# The pandas data science library that gives the 'spreadsheet'-like capabilities to python
import pandas as pd
# The matplotlip library is used to visualize data
import matplotlib.pyplot as plt
import matplotlib as mpl
# The pathlib library is used to manage access to disk. It helps us to keep things the same between Windows and Linux
from pathlib import Path
# The os library is used to list and access files and directories on the hard drive
import os, sys
# Numpy is the numerical library. It is used by many of the ones before, but we import it for in case
import numpy as np
# From Numpy we also import the nan 'missing-value' object that we use alot
from numpy import nan
# For fancy matching of strings, we use re
import re
# String libary for temporary file import
from io import StringIO
# Create tooltips for interactive plots
import mpld3
# For connecting to google sheet
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import xlrd
import datetime
from scipy import interpolate
from windrose import WindroseAxes
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
import matplotlib.cm as cm
from matplotlib.dates import DateFormatter
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import seaborn as sns
sns.set_theme(style="whitegrid")
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Configure the connection
scope = ['https://spreadsheets.google.com/feeds']
# Give the path to the Service Account Credential json file
# 1. Go to Google Development Console (https://console.developers.google.com/project)
# 2. Create new project
# 3. Go to “APIs & Services > Credentials” and choose “Create credentials > Service account key”.
# 4. Fill out the form
# 5. Click “Create key”
# 6. Select “JSON” and click “Create”
credentials = ServiceAccountCredentials.from_json_keyfile_name('/home/roelof/jupyter/key/crgprojects-b66f872bd816.json',
scope)
# Authorise your Notebook
gc = gspread.authorize(credentials)
# The sprad sheet ID, which can be taken from the link to the sheet
spreadsheet_key = '1_SjistxWekMyf_w59pu5vHDrhOfmeHLurB47mfq_ahM'
#Opening the worksheet by using Worksheet ID
def readSheet():
workbook = gc.open_by_key(spreadsheet_key)#Selecting which sheet to pulling the data
stations = workbook.worksheet('Overview')#Pulling the data and transform it to the data frame
values = stations.get_all_values()
Stations = pd.DataFrame(values[1:], columns=values[0])
return(stations,Stations)
stations, Stations = readSheet()
def head(filename,N=10):
n=0
with open(Path(filename)) as fp:
for l in fp.readlines():
if n<N:
print(l,end="")
n=n+1
def tail(filename,N=10):
n=0
with open(Path(filename)) as fp:
lines=[i for i in fp.readlines()]
Nlines=len(lines)
for l in lines[Nlines-N:]:
if n<N:
print(l,end="")
n=n+1
def ConvertNumeric(df,var):
df[var]= pd.to_numeric(df[var],errors='coerce')
def testLogger(filename):
Sep=","
Data=[]
with open(filename) as fp:
for line in fp.readlines():
row=line.split(Sep)
Data.append(len(row))
print(pd.DataFrame({"Columns":Data}).describe())
def ncolumnsLogger(filename):
Sep=","
Data=[]
with open(filename) as fp:
for line in fp.readlines():
row=line.split(Sep)
Data.append(len(row))
return(np.median(np.array(Data)))
def readLogger(filename, nLines=18):
DataString=""
Sep=","
with open(filename) as fp:
for line in fp.readlines():
row=line.split(Sep)
if isinstance(nLines, list):
if len(row) in nLines and re.match('\W*(\d{4})[/.-](\d{2})[/.-](\d{2})\s{1}(\d{2}):(\d{2}):(\d{2})\W*',row[0]):
DataString=DataString+line
else:
if len(row) == nLines and re.match('\W*(\d{4})[/.-](\d{2})[/.-](\d{2})\s{1}(\d{2}):(\d{2}):(\d{2})\W*',row[0]):
DataString=DataString+line
return(DataString)
def readRM(filename, nLines=18):
DataString=""
Sep=","
with open(filename) as fp:
for line in fp.readlines():
row=line.split(Sep)
if len(row) == nLines and re.match('\W*(\d{4})[/.-](\d{2})[/.-](\d{2})\s{1}(\d{2}):(\d{2}):(\d{2})\W*',row[0]):
DataString=DataString+line
return(DataString)
def lsR(dir,pattern=".csv",prefix="",postfix=""):
list=[]
for path,dirs,files in os.walk(Path(dir)):
for f in files:
if f.count(pattern) > 0 and f.startswith(prefix) and f.endswith(postfix):
list.append(os.path.join(path,f))
return(list)
def clearplots():
plt.clf()
def plotdiurnalstats(df,var='PM10_logger (ug/m3)'):
df['Time'] = df.index.map(lambda x: x.strftime("%H:00"))
diurnal = df[['Time',var]].groupby('Time').describe()[var]
fig, ax = plt.subplots(1, figsize=(7,6))
ax.set_ylabel(var, fontsize=14, weight='bold')
ax.set_xlabel('Time of Day', fontsize=14)
print("Plotting mean")
ax.plot(diurnal.index, diurnal['50%'], 'g', linewidth=2.0,label=var)
print("Plotting 75%")
ax.plot(diurnal.index, diurnal['75%'], color='g')
print("Plotting 25%")
ax.plot(diurnal.index, diurnal['25%'], color='g')
ax.fill_between(diurnal.index, diurnal['50%'], diurnal['75%'], alpha=.25, facecolor='g')
ax.fill_between(diurnal.index, diurnal['50%'], diurnal['25%'], alpha=.25, facecolor='g')
ax.set_xlim(0, 23);
ax.set_xticks([i for i in range(0,24,3)])
plt.tight_layout()
def plotdiurnal(df,var=''):
df['decimalhour'] = df.index.hour+(df.index.minute/60)
fig, ax = plt.subplots(figsize=(7,6))
ax.set_ylabel(var, fontsize=14, weight='bold')
ax.set_xlabel('Time of Day', fontsize=14)
ax.plot(df['decimalhour'].values, df[var].values, 'o', markersize=2, linewidth=0, label=var, alpha=0.3)
#def format_coord(x, y):
# z=np.array_str(df[(df['decimalhour']==x)&(df[var]==y)].index.map(lambda x: x.strftime("%y/%m/%d %H:%M")).values)[1:34]
# return('x={}, y={}, date={}'%(x, y,z))
#ax.format_coord = format_coord
ax.set_xlim(0, 23);
ax.set_xticks([i for i in range(0,24,3)])
plt.tight_layout()
df=df.drop(['decimalhour'], axis = 1)
def plottimeseries(df,var=''):
fig, ax = plt.subplots(1,figsize=(7,6))
df[[var]].plot(ax=ax)
ax.set_ylabel(var, fontsize=14, weight='bold')
ax.set_xlabel('', fontsize=14)
# Default function to read data format from SAAQIS database
def custom_date_parser(time):
NoDates=['Max Date', 'Max Time', 'Minimum', 'Min Date', 'Min Time','Avg','Std','Data Percent','Num', 'Maximum']
if time not in NoDates:
try:
time = datetime.datetime.strptime(time, "%H:%M %d/%m/%Y")
except ValueError:
time = time.replace('24:', '23:')
time = datetime.datetime.strptime(time, "%H:%M %d/%m/%Y")
time += datetime.timedelta(hours=1)
except:
return(np.nan)
return(time)
# This 'widget' is used to give us interactive graph capabilities in the matplotlib figures
%matplotlib widget
HomeDir=Path('/home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018')
Data for each province is in corresponding directory
os.listdir('/home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20210101')
['GP', 'FS', 'MP', 'DEFF', 'Limpopo', 'EC', 'WC', 'NW', 'SASOL', 'Eskom', 'SAWS', 'KZN', 'NC']
os.listdir(HomeDir)
['Gauteng', 'Mpumalanga', 'Limpopo', 'SAAQIS_raw.csv', 'North_West']
HomeDir
PosixPath('/home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018')
import re
string = 'abcd1234efg567'
newstring = re.sub(r'[0-9]+', '', string)
print(newstring)
abcdefg
DiskStations={}
for s in lsR('/home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20210101', 'xlsx'):
station=s.split('/')[-1][:-5]
station = re.sub(r'[0-9| ]+', '', station)
authority=s.split('/')[-2]
if s not in DiskStations.keys():
DiskStations[station]=authority
for s in DiskStations:
print(s,DiskStations[s])
DiskStations
PTAWest Tshwane ekandustria Tshwane booysens Tshwane mamelodi Tshwane Hammanskraal Tshwane bodibeng Tshwane rosslyn Tshwane olievenhoutbosch Tshwane tshwanemarket Tshwane rebecca_diepkloof_data DEFF kliprivier DEFF diepkloof DEFF sebokeng DEFF threerivers DEFF sharpeville DEFF diepkloof_pm.bigger DEFF meyerton Sedibeng vanderbijlpark Sedibeng nwuvaalcampus NWU mogalecity Westrand Moonies Westrand randfontein Westrand wattville Ekurhuleni springs Ekurhuleni olifantsfontein Ekurhuleni thokoza Ekurhuleni tsakane Ekurhuleni bedfordview Ekurhuleni tembisa Ekurhuleni etwatwa Ekurhuleni Randwater Eskom alex CoJ ivorypark CoJ diepsloot CoJ jabavu CoJ orangefarm CoJ newtown CoJ buccleugh CoJ deltapark CoJ zamdela DEFF bonganimabasoecopark SASOL leitrim SASOL ajjacobs SASOL pelonomi Mangaung balfour MP middelburg DEFF emalahleni MP delmas MP standerton MP mhluzi steve tshwete ermelo DEFF hendrina DEFF secunda DEFF witbank DEFF embalenhlenorth SASOL bosjesspruit SASOL embalenhle SASOL club SASOL grootvleisasol SASOL embalenhlesouth SASOL lebohang SASOL Phola Eskom Majuba Eskom Grootvlei Eskom KendalPoultry Eskom Komati Eskom Camden Eskom Kwazamokuhle Eskom Verkykkop Eskom Elandsfontein Eskom Ezamukuhle Eskom Grootdraaidam Eskom Leandra Eskom Krielvillage Eskom Medupi Eskom krielvillage Eskom thabazimbi DEFF mokopane DEFF lephalale DEFF xanadu DEFF dilokong Sekhukhune phalaborwa Limpopo steelpoort Limpopo Marapong Eskom capricorn Capricorn uitenhage N Mandela algoaparkclinic N Mandela walmer N Mandela motherwelldayhospital N Mandela motherwell Coega Development Corporation saltworks Coega Development Corporation mobile Buffalo City eastlondon Buffalo City zwelitsha Buffalo City wallacedene CoCT atlantis CoCT bellvillesouth CoCT khayelitsha CoCT cityhall CoCT goodwood CoCT foreshore CoCT tableview CoCT Saldanabay Saldana Bay george WC worcester WC driftsands WC malmesbury WC hermanus WC stellenbosch WC khayelitshaWC WC capepoint SAWS welgegund NWU jouberton NW kanana NW phokeng NW damonsville NW mafikeng NW khuma NW lichtenburg NW marikanacommunitycentre Rustenburg boitekong Rustenburg tlhabane Rustenburg Verkykkop_ Eskom Phola_ Eskom Majuba_ Eskom Leandra_ Eskom Grootdraaidam_ Eskom karoo SAWS felixton RBCAA brackenham Umhlathuze cbd RBCAA mtunzini RBCAA enseleni RBCAA esikhawini RBCAA esikhaleni Umhlathuze arboretumwaterfacility Umhlathuze pietermaritzburgkzn KZN empangeni KZN escourt KZN pietermaritzburgairportoribi Msunduzi wentworthreservior Ethekwini ganges Ethekwini southernworks Ethekwini settlers Ethekwini alverstone Ethekwini cityhalldurban Ethekwini
{'PTAWest': 'Tshwane',
'ekandustria': 'Tshwane',
'booysens': 'Tshwane',
'mamelodi': 'Tshwane',
'Hammanskraal': 'Tshwane',
'bodibeng': 'Tshwane',
'rosslyn': 'Tshwane',
'olievenhoutbosch': 'Tshwane',
'tshwanemarket': 'Tshwane',
'rebecca_diepkloof_data': 'DEFF',
'kliprivier': 'DEFF',
'diepkloof': 'DEFF',
'sebokeng': 'DEFF',
'threerivers': 'DEFF',
'sharpeville': 'DEFF',
'diepkloof_pm.bigger': 'DEFF',
'meyerton': 'Sedibeng',
'vanderbijlpark': 'Sedibeng',
'nwuvaalcampus': 'NWU',
'mogalecity': 'Westrand',
'Moonies': 'Westrand',
'randfontein': 'Westrand',
'wattville': 'Ekurhuleni',
'springs': 'Ekurhuleni',
'olifantsfontein': 'Ekurhuleni',
'thokoza': 'Ekurhuleni',
'tsakane': 'Ekurhuleni',
'bedfordview': 'Ekurhuleni',
'tembisa': 'Ekurhuleni',
'etwatwa': 'Ekurhuleni',
'Randwater': 'Eskom',
'alex': 'CoJ',
'ivorypark': 'CoJ',
'diepsloot': 'CoJ',
'jabavu': 'CoJ',
'orangefarm': 'CoJ',
'newtown': 'CoJ',
'buccleugh': 'CoJ',
'deltapark': 'CoJ',
'zamdela': 'DEFF',
'bonganimabasoecopark': 'SASOL',
'leitrim': 'SASOL',
'ajjacobs': 'SASOL',
'pelonomi': 'Mangaung',
'balfour': 'MP',
'middelburg': 'DEFF',
'emalahleni': 'MP',
'delmas': 'MP',
'standerton': 'MP',
'mhluzi': 'steve tshwete',
'ermelo': 'DEFF',
'hendrina': 'DEFF',
'secunda': 'DEFF',
'witbank': 'DEFF',
'embalenhlenorth': 'SASOL',
'bosjesspruit': 'SASOL',
'embalenhle': 'SASOL',
'club': 'SASOL',
'grootvleisasol': 'SASOL',
'embalenhlesouth': 'SASOL',
'lebohang': 'SASOL',
'Phola': 'Eskom',
'Majuba': 'Eskom',
'Grootvlei': 'Eskom',
'KendalPoultry': 'Eskom',
'Komati': 'Eskom',
'Camden': 'Eskom',
'Kwazamokuhle': 'Eskom',
'Verkykkop': 'Eskom',
'Elandsfontein': 'Eskom',
'Ezamukuhle': 'Eskom',
'Grootdraaidam': 'Eskom',
'Leandra': 'Eskom',
'Krielvillage': 'Eskom',
'Medupi': 'Eskom',
'krielvillage': 'Eskom',
'thabazimbi': 'DEFF',
'mokopane': 'DEFF',
'lephalale': 'DEFF',
'xanadu': 'DEFF',
'dilokong': 'Sekhukhune',
'phalaborwa': 'Limpopo',
'steelpoort': 'Limpopo',
'Marapong': 'Eskom',
'capricorn': 'Capricorn',
'uitenhage': 'N Mandela',
'algoaparkclinic': 'N Mandela',
'walmer': 'N Mandela',
'motherwelldayhospital': 'N Mandela',
'motherwell': 'Coega Development Corporation',
'saltworks': 'Coega Development Corporation',
'mobile': 'Buffalo City',
'eastlondon': 'Buffalo City',
'zwelitsha': 'Buffalo City',
'wallacedene': 'CoCT',
'atlantis': 'CoCT',
'bellvillesouth': 'CoCT',
'khayelitsha': 'CoCT',
'cityhall': 'CoCT',
'goodwood': 'CoCT',
'foreshore': 'CoCT',
'tableview': 'CoCT',
'Saldanabay': 'Saldana Bay',
'george': 'WC',
'worcester': 'WC',
'driftsands': 'WC',
'malmesbury': 'WC',
'hermanus': 'WC',
'stellenbosch': 'WC',
'khayelitshaWC': 'WC',
'capepoint': 'SAWS',
'welgegund': 'NWU',
'jouberton': 'NW',
'kanana': 'NW',
'phokeng': 'NW',
'damonsville': 'NW',
'mafikeng': 'NW',
'khuma': 'NW',
'lichtenburg': 'NW',
'marikanacommunitycentre': 'Rustenburg',
'boitekong': 'Rustenburg',
'tlhabane': 'Rustenburg',
'Verkykkop_': 'Eskom',
'Phola_': 'Eskom',
'Majuba_': 'Eskom',
'Leandra_': 'Eskom',
'Grootdraaidam_': 'Eskom',
'karoo': 'SAWS',
'felixton': 'RBCAA',
'brackenham': 'Umhlathuze',
'cbd': 'RBCAA',
'mtunzini': 'RBCAA',
'enseleni': 'RBCAA',
'esikhawini': 'RBCAA',
'esikhaleni': 'Umhlathuze',
'arboretumwaterfacility': 'Umhlathuze',
'pietermaritzburgkzn': 'KZN',
'empangeni': 'KZN',
'escourt': 'KZN',
'pietermaritzburgairportoribi': 'Msunduzi',
'wentworthreservior': 'Ethekwini',
'ganges': 'Ethekwini',
'southernworks': 'Ethekwini',
'settlers': 'Ethekwini',
'alverstone': 'Ethekwini',
'cityhalldurban': 'Ethekwini'}
#pd.Series(data=Stations, name='Stations')
#help(pd.DataFrame)
def readStation(file, authority, var):
# Read the data file
df=pd.read_excel(file,
skiprows=[0,1,3,4],
parse_dates=[0],
date_parser=custom_date_parser,
index_col=0,
na_values=['', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan','1.#IND','1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a','nan', 'null','-999.0']
)
df.index.name='Date'
# Remove lines with a bad date index
df=df.loc[~df.index.isnull()]
# Remove lines where all data is missing
df=df.dropna(axis=0, how='all')
# Make sure that the variables are numeric
df=df.apply(pd.to_numeric, errors='ignore')
# Create short version
df=df.stack()
df=df.reset_index(level=[1])
df.columns=['Site','Value']
df['Variable']=var
df['Unit']='ppm'
df['Status']=0
df['Authority']=authority
df=df.reset_index()
df=df.set_index((df.Site + df.Variable + df.Unit + df.Date.astype(str) + df.Value.astype(str)).apply(hash))
return(df)
#help(pd.read_excel)
FixVars={'Wdir':'WDir',
'AmbRelH':'RH',
'Amb WDirection':'WindDir',
'SolRad':'SolarRadiation',
'Amb WSpeed':'WindSpeed',
'AmbTemp':'Temperature'}
dfRaw = pd.DataFrame()
for f in lsR(HomeDir, 'xlsx'):
Filename=f.split('/')[-1]
Authority=Filename.split('_')[0]
Variable=Filename.split('_')[1]
print(Authority,Variable,Filename,f)
dftmp=readStation(f,Authority,Variable)
dfRaw=pd.concat([dfRaw,dftmp])
dfRaw['Variable']=dfRaw['Variable'].replace(FixVars, regex=True)
GJA CO GJA_CO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_CO_2017_2018.xlsx GJA NO2 GJA_NO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO2_2018_2019.xlsx GJA NO GJA_NO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO_2019_2020.xlsx GJA AmbRelH GJA_AmbRelH_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2017.xlsx GJA CO GJA_CO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_CO_2016_2017.xlsx GJA Amb WDirection GJA_Amb WDirection_2016.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2016.xlsx GJA PM10 GJA_PM10_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM10_2016_2017.xlsx GJA Amb WDirection GJA_Amb WDirection_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2020.xlsx GJA AmbRelH GJA_AmbRelH_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2021.xlsx GJA PM2.5 GJA_PM2.5_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM2.5_2016_2017.xlsx GJA AmbRelH GJA_AmbRelH_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2018.xlsx GJA SolRad GJA_SolRad_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2020.xlsx GJA O3 GJA_O3_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_O3_2016_2017.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2016.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2016.xlsx GJA NO2 GJA_NO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO2_2016_2017.xlsx GJA AmbTemp GJA_AmbTemp_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2020.xlsx GJA AmbTemp GJA_AmbTemp_2016.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2016.xlsx GJA NOx GJA_NOx_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NOx_2016_2017.xlsx GJA Amb WDirection GJA_Amb WDirection_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2019.xlsx GJA AmbTemp GJA_AmbTemp_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2019.xlsx GJA PM10 GJA_PM10_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM10_2018_2019.xlsx GJA NO2 GJA_NO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO2_2020_2021.xlsx GJA O3 GJA_O3_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_O3_2018_2019.xlsx GJA PM2.5 GJA_PM2.5_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM2.5_2018_2019.xlsx GJA SO2 GJA_SO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SO2_2016_2017.xlsx GJA SO2 GJA_SO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SO2_2017_2018.xlsx GJA NOx GJA_NOx_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NOx_2017_2018.xlsx GJA PM2.5 GJA_PM2.5_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM2.5_2020_2021.xlsx GJA SolRad GJA_SolRad_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2019.xlsx GJA AmbTemp GJA_AmbTemp_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2021.xlsx GJA Amb WDirection GJA_Amb WDirection_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2017.xlsx GJA SolRad GJA_SolRad_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2021.xlsx GJA NOx GJA_NOx_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NOx_2018_2019.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2019.xlsx GJA Amb WDirection GJA_Amb WDirection_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2021.xlsx GJA PM10 GJA_PM10_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM10_2020_2021.xlsx GJA NO GJA_NO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO_2016_2017.xlsx GJA SO2 GJA_SO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SO2_2020_2021.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2018.xlsx GJA AmbRelH GJA_AmbRelH_2016.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2016.xlsx GJA CO GJA_CO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_CO_2019_2020.xlsx GJA CO GJA_CO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_CO_2020_2021.xlsx GJA AmbTemp GJA_AmbTemp_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2018.xlsx GJA PM2.5 GJA_PM2.5_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM2.5_2017_2018.xlsx GJA NO GJA_NO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO_2020_2021.xlsx GJA NO2 GJA_NO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO2_2017_2018.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2020.xlsx GJA NO GJA_NO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO_2017_2018.xlsx GJA AmbTemp GJA_AmbTemp_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbTemp_2017.xlsx GJA PM10 GJA_PM10_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM10_2019_2020.xlsx GJA O3 GJA_O3_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_O3_2019_2020.xlsx GJA O3 GJA_O3_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_O3_2020_2021.xlsx GJA CO GJA_CO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_CO_2018_2019.xlsx GJA O3 GJA_O3_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_O3_2017_2018.xlsx GJA PM10 GJA_PM10_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM10_2017_2018.xlsx GJA SO2 GJA_SO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SO2_2019_2020.xlsx GJA SO2 GJA_SO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SO2_2018_2019.xlsx GJA SolRad GJA_SolRad_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2017.xlsx GJA PM2.5 GJA_PM2.5_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_PM2.5_2019_2020.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2021.xlsx GJA SolRad GJA_SolRad_2016.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2016.xlsx GJA SolRad GJA_SolRad_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_SolRad_2018.xlsx GJA NO2 GJA_NO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO2_2019_2020.xlsx GJA NOx GJA_NOx_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NOx_2020_2021.xlsx GJA Amb WSpeed GJA_Amb WSpeed_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WSpeed_2017.xlsx GJA Amb WDirection GJA_Amb WDirection_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_Amb WDirection_2018.xlsx GJA AmbRelH GJA_AmbRelH_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2019.xlsx GJA AmbRelH GJA_AmbRelH_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_AmbRelH_2020.xlsx GJA NO GJA_NO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NO_2018_2019.xlsx GJA NOx GJA_NOx_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Gauteng/GJA_NOx_2019_2020.xlsx MP NO2 MP_NO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO2_2017_2018.xlsx MP CO MP_CO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_CO_2019_2020.xlsx MP NO2 MP_NO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO2_2019_2020.xlsx MP SO2 MP_SO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_SO2_2016_2017.xlsx MP NO2 MP_NO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO2_2018_2019.xlsx MP NO MP_NO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO_2020_2021.xlsx MP SO2 MP_SO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_SO2_2019_2020.xlsx MP PM10 MP_PM10_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM10_2017_2018.xlsx MP O3 MP_O3_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_O3_2020_2021.xlsx MP PM10 MP_PM10_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM10_2019_2020.xlsx MP O3 MP_O3_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_O3_2018_2019.xlsx MP NOx MP_NOx_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NOx_2017_2018.xlsx MP PM2.5 MP_PM2.5_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM2.5_2017_2018.xlsx MP NOx MP_NOx_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NOx_2019_2020.xlsx MP PM10 MP_PM10_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM10_2020_2021.xlsx MP PM2.5 MP_PM2.5_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM2.5_2018_2019.xlsx MP NO2 MP_NO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO2_2020_2021.xlsx MP NO MP_NO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO_2016_2017.xlsx MP NO MP_NO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO_2017_2018.xlsx MP O3 MP_O3_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_O3_2017_2018.xlsx MP PM2.5 MP_PM2.5_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM2.5_2020_2021.xlsx MP O3 MP_O3_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_O3_2019_2020.xlsx MP CO MP_CO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_CO_2018_2019.xlsx MP PM10 MP_PM10_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM10_2016_2017.xlsx MP NOx MP_NOx_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NOx_2018_2019.xlsx MP NO2 MP_NO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO2_2016_2017.xlsx MP CO MP_CO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_CO_2016_2017.xlsx MP PM10 MP_PM10_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM10_2018_2019.xlsx MP NO MP_NO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO_2019_2020.xlsx MP O3 MP_O3_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_O3_2016_2017.xlsx MP CO MP_CO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_CO_2020_2021.xlsx MP NOx MP_NOx_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NOx_2016_2017.xlsx MP SO2 MP_SO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_SO2_2017_2018.xlsx MP SO2 MP_SO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_SO2_2020_2021.xlsx MP NO MP_NO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NO_2018_2019.xlsx MP CO MP_CO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_CO_2017_2018.xlsx MP PM2.5 MP_PM2.5_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM2.5_2019_2020.xlsx MP SO2 MP_SO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_SO2_2018_2019.xlsx MP PM2.5 MP_PM2.5_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_PM2.5_2016_2017.xlsx MP NOx MP_NOx_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Mpumalanga/MP_NOx_2020_2021.xlsx LP PM2.5 LP_PM2.5_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM2.5_2020_2021.xlsx LP WSpeed LP_WSpeed_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WSpeed_2019_2020.xlsx LP SRad LP_SRad_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SRad_2017_2018.xlsx LP Wdir LP_Wdir_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Wdir_2018_2019.xlsx LP CO LP_CO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_CO_2018_2019.xlsx LP WSpeed LP_WSpeed_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WSpeed_2018_2019.xlsx LP O3 LP_O3_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_O3_2018_2019.xlsx LP NO2 LP_NO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO2_2018_2019.xlsx LP PM10 LP_PM10_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM10_2017_2018.xlsx LP PM2.5 LP_PM2.5_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM2.5_2018_2019.xlsx LP SRad LP_SRad_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SRad_2019_2020.xlsx LP NO LP_NO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO_2016_2017.xlsx LP RH LP_RH_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_RH_2016_2017.xlsx LP PM2.5 LP_PM2.5_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM2.5_2019_2020.xlsx LP SO2 LP_SO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SO2_2016_2017.xlsx LP SRad LP_SRad_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SRad_2020_2021.xlsx LP PM10 LP_PM10_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM10_2020_2021.xlsx LP PM2.5 LP_PM2.5_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM2.5_2017_2018.xlsx LP NOx LP_NOx_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NOx_2016_2017.xlsx LP O3 LP_O3_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_O3_2017_2018.xlsx LP NO2 LP_NO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO2_2020_2021.xlsx LP Temp LP_Temp_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Temp_2019_2020.xlsx LP WDir LP_WDir_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WDir_2019_2020.xlsx LP NO LP_NO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO_2018_2019.xlsx LP NO2 LP_NO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO2_2016_2017.xlsx LP RH LP_RH_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_RH_2020_2021.xlsx LP WSpeed LP_WSpeed_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WSpeed_2017_2018.xlsx LP Wdir LP_Wdir_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Wdir_2017_2018.xlsx LP WDir LP_WDir_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WDir_2020_2021.xlsx LP NO LP_NO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO_2020_2021.xlsx LP NOx LP_NOx_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NOx_2020_2021.xlsx LP O3 LP_O3_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_O3_2020_2021.xlsx LP PM10 LP_PM10_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM10_2016_2017.xlsx LP O3 LP_O3_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_O3_2019_2020.xlsx LP PM2.5 LP_PM2.5_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM2.5_2016_2017.xlsx LP Temp LP_Temp_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Temp_2018_2019.xlsx LP WSpeed LP_WSpeed_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WSpeed_2016_2017.xlsx LP NO LP_NO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO_2019_2020.xlsx LP SO2 LP_SO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SO2_2017_2018.xlsx LP CO LP_CO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_CO_2019_2020.xlsx LP SRad LP_SRad_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SRad_2018_2019.xlsx LP SO2 LP_SO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SO2_2020_2021.xlsx LP SO2 LP_SO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SO2_2018_2019.xlsx LP CO LP_CO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_CO_2020_2021.xlsx LP Wdir LP_Wdir_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Wdir_2016_2017.xlsx LP NOx LP_NOx_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NOx_2018_2019.xlsx LP NO2 LP_NO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO2_2017_2018.xlsx LP SRad LP_SRad_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SRad_2016_2017.xlsx LP SO2 LP_SO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_SO2_2019_2020.xlsx LP NO LP_NO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO_2017_2018.xlsx LP CO LP_CO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_CO_2017_2018.xlsx LP PM10 LP_PM10_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM10_2019_2020.xlsx LP PM10 LP_PM10_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_PM10_2018_2019.xlsx LP RH LP_RH_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_RH_2018_2019.xlsx LP WSpeed LP_WSpeed_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_WSpeed_2020_2021.xlsx LP CO LP_CO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_CO_2016_2017.xlsx LP RH LP_RH_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_RH_2017_2018.xlsx LP O3 LP_O3_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_O3_2016_2017.xlsx LP NOx LP_NOx_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NOx_2017_2018.xlsx LP Temp LP_Temp_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Temp_2016_2017.xlsx LP RH LP_RH_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_RH_2019_2020.xlsx LP NOx LP_NOx_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NOx_2019_2020.xlsx LP NO2 LP_NO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_NO2_2019_2020.xlsx LP Temp LP_Temp_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Temp_2017_2018.xlsx LP Temp LP_Temp_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/Limpopo/LP_Temp_2020_2021.xlsx NW PM10 NW_PM10_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM10_2016_2017.xlsx NW PM2.5 NW_PM2.5_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM2.5_2018_2019.xlsx NW O3 NW_O3_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_O3_2019_2020.xlsx NW NO2 NW_NO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO2_2019_2020.xlsx NW O3 NW_O3_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_O3_2016_2017.xlsx NW CO NW_CO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_CO_2020_2021.xlsx NW O3 NW_O3_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_O3_2017_2018.xlsx NW CO NW_CO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_CO_2019_2020.xlsx NW NO NW_NO_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO_2019_2020.xlsx NW SO2 NW_SO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_SO2_2018_2019.xlsx NW NO2 NW_NO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO2_2020_2021.xlsx NW NO2 NW_NO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO2_2017_2018.xlsx NW AmbTemp NW_AmbTemp_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_AmbTemp_2021.xlsx NW PM10 NW_PM10_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM10_2019_2020.xlsx NW CO NW_CO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_CO_2017_2018.xlsx NW NOx NW_NOx_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NOx_2018_2019.xlsx NW NOx NW_NOx_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NOx_2017_2018.xlsx NW NO NW_NO_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO_2017_2018.xlsx NW NO2 NW_NO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO2_2016_2017.xlsx NW O3 NW_O3_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_O3_2020_2021.xlsx NW PM2.5 NW_PM2.5_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM2.5_2017_2018.xlsx NW CO NW_CO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_CO_2016_2017.xlsx NW NO NW_NO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO_2018_2019.xlsx NW O3 NW_O3_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_O3_2018_2019.xlsx NW AmbTemp NW_AmbTemp_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_AmbTemp_2020.xlsx NW PM10 NW_PM10_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM10_2020_2021.xlsx NW NOx NW_NOx_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NOx_2016_2017.xlsx NW SO2 NW_SO2_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_SO2_2020_2021.xlsx NW PM2.5 NW_PM2.5_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM2.5_2016_2017.xlsx NW NOx NW_NOx_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NOx_2019_2020.xlsx NW NO NW_NO_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO_2016_2017.xlsx NW NO NW_NO_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO_2020_2021.xlsx NW NO2 NW_NO2_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NO2_2018_2019.xlsx NW CO NW_CO_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_CO_2018_2019.xlsx NW SO2 NW_SO2_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_SO2_2017_2018.xlsx NW PM10 NW_PM10_2018_2019.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM10_2018_2019.xlsx NW PM2.5 NW_PM2.5_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM2.5_2019_2020.xlsx NW NOx NW_NOx_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_NOx_2020_2021.xlsx NW PM2.5 NW_PM2.5_2020_2021.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM2.5_2020_2021.xlsx NW SO2 NW_SO2_2019_2020.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_SO2_2019_2020.xlsx NW SO2 NW_SO2_2016_2017.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_SO2_2016_2017.xlsx NW PM10 NW_PM10_2017_2018.xlsx /home/roelof/Dropbox (NWU)/CRG_Data/SAAQIS_Data/20211018/North_West/NW_PM10_2017_2018.xlsx
FixVars={'Wdir':'WDir',
'AmbRelH':'RH',
'Amb WDirection':'WindDir',
'SolRad':'SolarRadiation',
'Amb WSpeed':'WindSpeed',
'AmbTemp':'Temperature'}
dfRaw['Variable']=dfRaw['Variable'].replace(FixVars, regex=True)
Sites=dfRaw['Site'].unique()
Variables=dfRaw['Variable'].unique()
Sites.sort()
Sites
array(['Alexandra-NAQI', 'Balfour', 'Bedfordview-NAQI', 'Bodibeng-NAQI',
'Boitekong', 'Booysens', 'Bosjesspruit', 'Buccleugh-NAQI',
'Camden', 'Capricorn', 'Chicken Farm', 'Club-NAQI', 'Damonsville',
'Delmas MP', 'Diepkloof-NAQI', 'Diepsloot', 'Dilokong',
'Ekandustria', 'Elandsfontein-NAQI', 'Embalenhle', 'Ermelo-NAQI',
'Etwatwa-NAQI', 'Ezamokuhle', 'Ezamokuhle 2', 'Grootdraaidam',
'Grootvlei - Eskom', 'Grootvlei - Sasol', 'Hammanskraal',
'Hendrina - SAWS', 'Ivory Park', 'Jabavu-NAQI', 'Jouberton',
'KOMATI', 'Kanana', 'Kendal', 'Khuma', 'Kliprivier',
'Kriel Village', 'Kwazamokuhle', 'Lebohang', 'Leondale',
'Lephalale-NAQI', 'Lichtenburg', 'Mafikeng-NAQI', 'Majuba',
'Mamelodi', 'Marapong', 'Marikana CC-NAQI', 'Masakhane', 'Medupi',
'Meyerton', 'Mhluzi', 'Middelburg MP', 'Middelburg SAWS-NAQI',
'Mogale City', 'Mokopane', 'North West University Vaal campus',
'Olievenhoutbosch-NAQI', 'Olifantsfontein-NAQI', 'Orange Farm',
'PTA West', 'Phalaborwa-NAQI', 'Phokeng', 'Phola', 'Randfontein',
'Randwater', 'Rosslyn-NAQI', 'Sebokeng', 'Secunda', 'Sharpeville',
'Silobela', 'Sivukile', 'Springs Girls High School',
'Springs-NAQI', 'Standerton - MP Province',
'Station 2 (Molengraaf Street)', 'Station 6 (Naboom Street)',
'Station 9 (HME)', 'Steelpoort', 'Tembisa', 'Thabazimbi',
'Thbelihle', 'Thokoza-NAQI', 'Three Rivers', 'Tlhabane', 'Tsakane',
'Tshwane Market', 'Vanderbijlpark-NAQI', 'Wattville',
'Welgegund-NAQI', 'Xanadu-NAQI', 'eMalahleni - SAWS',
'eMalahleni -MP Province', 'eMbalenhle North', 'eMbalenhle South'],
dtype=object)
Variables
array(['CO', 'NO2', 'NO', 'RH', 'WindDir', 'PM10', 'PM2.5',
'SolarRadiation', 'O3', 'WindSpeed', 'Temperature', 'NOx', 'SO2',
'WSpeed', 'SRad', 'WDir', 'Temp'], dtype=object)
dfRaw.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 17133756 entries, 1852255697077307782 to 381845362496843923 Data columns (total 7 columns): # Column Dtype --- ------ ----- 0 Date datetime64[ns] 1 Site object 2 Value float64 3 Variable object 4 Unit object 5 Status int64 6 Authority object dtypes: datetime64[ns](1), float64(1), int64(1), object(4) memory usage: 1.0+ GB
for v in Variables:
dftmp=dfRaw[(dfRaw['Variable']==v)&(dfRaw['Status']==0)]
print(v,dftmp['Value'].min(),dftmp['Value'].mean(),dftmp['Value'].max())
CO -982.333 1.0764385783889987 1719.336 NO2 -982.017 14.772773723825052 1999998.0 NO -982.328 14.219048855361276 1999998.0 RH -981.7 54.86097083665307 250044.72 WindDir 0.0 165.24235932311535 360.0 PM10 -15112.66 61.25016485564989 666667.126 PM2.5 -1554.124 27.096234567072788 399999.6 SolarRadiation -982.38 195.0294255837433 250333.7 O3 -981.833 29.620464432283605 666733.333 WindSpeed -982.35 4.540857894978094 250001.62 Temperature -982.17 18.268934927544212 166697.0 NOx -981.917 24.317960393625192 1999998.0 SO2 -66305.293 8.334168928255243 666673.833 WSpeed -0.05 1.8531684830025856 114.73 SRad -1.05 206.8467777158903 1655.32 WDir 0.0 149.64005439560265 360.0 Temp -752.0 22.744673567963176 1434.3
Station information from Google Sheets
Stations
| Station_SAAQIS_Name | Station_Name | Type | Owner | Location | Province | District | Latitude | Longitude | Height | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Algoa park Clinic | Algoa Park Clinic | Residential - Medium/Upper income | Nelson Mandela Metropolitan | Algoa Park residential area | Eastern Cape | Nelson Mandela Bay Metro | -33.901361 | 25.564167 | 0 |
| 1 | Mobile | Mobile | Residential - Low Income | Buffalo City Metropolitan | Gompo Clinic, East London | Eastern Cape | Buffalo City Metro | -33.014944 | 27.849248 | 0 |
| 2 | Motherwell | Motherwell | Residential - Low Income | Coega Development Corporation | Ikamvelihle township | Eastern Cape | Nelson Mandela Bay Metro | -33.795488 | 25.616957 | 0 |
| 3 | Motherwell Day Hospital | Motherwell Day Hospital | Residential - Medium/Upper Income | Nelson Mandela Metropolitan | Motherwell Day Hospital | Eastern Cape | Nelson Mandela Bay Metro | -33.811066 | 25.597658 | 54 |
| 4 | Saltworks - NAQI | Saltworks - NAQI | Traffic - Street Canyon | Coega Development Corporation | Coega Special Economic Zone (SEZ) | Eastern Cape | Nelson Mandela Bay Metro | -33.763778 | 25.683428 | 46 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183 | City Hall | City Hall | Traffic - Roadside | City of Cape Town | Cape Town city hall (CBD), Darling Street | WesternCape | Cape Town | -33.9253 | 18.4238393 | 0 |
| 184 | Drill Hall | Cape Town | -33.9259 | 18.4246681 | ||||||
| 185 | Ferndale | eThekwini | -29.7779 | 30.22211 | ||||||
| 186 | Jacobs Air Monitoring | eThekwini | -29.9317 | 30.976995 | ||||||
| 187 | Killarney | Cape Town | -33.8349 | 18.5274851 |
188 rows × 10 columns
Rename stations with data from Google Sheets
# Rename stations
for idx,row in Stations[Stations['Station_SAAQIS_Name']!=Stations['Station_Name']][['Station_SAAQIS_Name','Station_Name']].iterrows():
dfRaw['Site']=dfRaw['Site'].replace(row['Station_SAAQIS_Name'],row['Station_Name'])
print(row['Station_Name'])
Algoa Park Clinic Mobile Walmer - NAQI East London AJ_Jacobs Bayswater_Clinic Eco_Park Kagisanong Pelonomi Zamdela_NWU Zamdela Alexandra Bedfordview Bodibeng Buccleugh Diepkloof Etwatwa Germiston Ivory_park Jabavu Leondale_City Mogale_City NWU_Vaal Olievenhoutbosch Olifantsfontein Orange_Farm PTA_West Rosslyn Springs-new Springs Tswane_Market Vanderbijlpark eNseleni-RBCAA RBCAA_Airport City Hall - Durban - NAQI Ganges New Castle Pietermaritzburg -KZN Pietermaritzburg airport - ORIBI Settlers - NAQI Warwick Wentworth Resevoir Lephalale Phalaborwa PMC_Station_2 PMC_Station_6 PMC_Station_9 Chicken_Farm Club Delmas Elandsfontein eMalahleni eMalahleni_Province Embalenhle_North Embalenhle_South Ermelo Ezamokuhle_2 Grootvlei_Eskom Grootvlei_Sasol Hendrina Middelburg_Province Middelburg Standerton Mafikeng Marikana Welgegund Xanadu Foreshore Goodwood Khayelitsha Saldana Bay Table view Veldedrif Wallacedene Worcester Atlantis Bellville South - NAQI
#dfRaw.to_csv(Path(HomeDir / 'SAAQIS_raw.csv'))
dfRaw=pd.read_csv(Path(HomeDir / 'SAAQIS_raw.csv'), index_col=0)
dfRaw['Date']=pd.to_datetime(dfRaw['Date'])
dfRaw.columns
Index(['Date', 'Site', 'Value', 'Variable', 'Unit', 'Status', 'Authority'], dtype='object')
def extractStation(df,Var,Site="",Authority="",Status=100):
df=df[(df['Variable']==Var)&(df['Status']<=Status)]
if len(Site) > 0:
df=df[(df['Site']==Site)]
if len(Authority) > 0:
df=df[(df['Authority']==Authority)]
df=df.reset_index()
df=df.set_index('Date')
df=df.resample('1H').first()
return(df)
def writeStation(dfRaw,df):
df=df.dropna(subset=['index'])
df=df.reset_index()
df=df.set_index((df.Site + df.Variable + df.Unit + df.Date.astype(str) + df.Value.astype(str)).apply(hash))
print("Setting ",len(df[df['Status']>0]),'status')
dfRaw.loc[df[df['Status']>0].index, 'Status']=df[df['Status']>0]['Status']
#return(dfRaw)
def stuckValue(df,Var='PM2.5 (ug/m3)',diff=0.1,stuckNumber=3):
print("In stuckValue:",Var,diff,stuckNumber)
df['diff']=(abs(df[Var]-df[Var].shift())>diff).astype(int)
df['c'] = (df['diff'] != 0).cumsum()
df['a'] = (df['c'] == 0).astype(int)
df['streak'] = df.groupby( 'c' ).cumcount() + df['a']
df.loc[df['streak'] > stuckNumber,'Status'] = 2
df.drop(['streak', 'a','c','diff'], axis=1, inplace=True)
df=df.dropna(subset=['index'])
return(df[df['Status']==2])
months={1:'January',
2:'February',
3:'March',
4:'April',
5:'May',
6:'June',
7:'July',
8:'August',
9:'September',
10:'October',
11:'November',
12:'December'}
output=[["Agent", "Period", "Standard", "Exceeds", "Average", "Interval", "N", "N exceeds", "Std Dev", "Median", "25%", "75%", "99%"]]
Vars={"PM10 (ug/m3)",
"PM2.5 (ug/m3)",
"SO2 (ppb)",
"NO2 (ppb)",
"H2S (ppb)",
"CO (ppm)",
"O3 (ppb)"}
DataAvailable={'1D':1152,
'A':367920,
'8H':384,
'4H':192,
'1H':48}
def is_leap_year(year):
return int(year % 4 == 0 and (year % 100 != 0 or year % 400 == 0))
def numdays(month,year):
days={1:31,2:28,3:31,4:30,5:31,6:30,7:31,8:31,9:30,10:31,11:30,12:31}
if month == 2:
return(days[month]+is_leap_year(year))
else:
return(days[month])
def confidence_interval(data, confidence=0.99):
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), scipy.stats.sem(a)
h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
return( m-h, m+h)
CStations=dfRaw['Site'].unique()
Variables=dfRaw['Variable'].unique()
Authority=dfRaw['Authority'].unique()
CStations.sort()
(CStations, Variables, Authority)
(array(['Alexandra', 'Balfour', 'Bedfordview', 'Bodibeng', 'Boitekong',
'Booysens', 'Bosjesspruit', 'Buccleugh', 'Camden', 'Capricorn',
'Chicken_Farm', 'Club', 'Damonsville', 'Delmas', 'Diepkloof',
'Diepsloot', 'Dilokong', 'Ekandustria', 'Elandsfontein',
'Embalenhle', 'Embalenhle_North', 'Embalenhle_South', 'Ermelo',
'Etwatwa', 'Ezamokuhle', 'Ezamokuhle_2', 'Grootdraaidam',
'Grootvlei_Eskom', 'Grootvlei_Sasol', 'Hammanskraal', 'Hendrina',
'Ivory_park', 'Jabavu', 'Jouberton', 'KOMATI', 'Kanana', 'Kendal',
'Khuma', 'Kliprivier', 'Kriel Village', 'Kwazamokuhle', 'Lebohang',
'Leondale_City', 'Lephalale', 'Lichtenburg', 'Mafikeng', 'Majuba',
'Mamelodi', 'Marapong', 'Marikana', 'Masakhane', 'Medupi',
'Meyerton', 'Mhluzi', 'Middelburg', 'Middelburg_Province',
'Mogale_City', 'Mokopane', 'NWU_Vaal', 'Olievenhoutbosch',
'Olifantsfontein', 'Orange_Farm', 'PMC_Station_2', 'PMC_Station_6',
'PMC_Station_9', 'PTA_West', 'Phalaborwa', 'Phokeng', 'Phola',
'Randfontein', 'Randwater', 'Rosslyn', 'Sebokeng', 'Secunda',
'Sharpeville', 'Silobela', 'Sivukile', 'Springs', 'Springs-new',
'Standerton', 'Steelpoort', 'Tembisa', 'Thabazimbi', 'Thbelihle',
'Thokoza-NAQI', 'Three Rivers', 'Tlhabane', 'Tsakane',
'Tswane_Market', 'Vanderbijlpark', 'Wattville', 'Welgegund',
'Xanadu', 'eMalahleni', 'eMalahleni_Province'], dtype=object),
array(['CO', 'NO2', 'NO', 'RH', 'WindDir', 'PM10', 'PM2.5',
'SolarRadiation', 'O3', 'WindSpeed', 'Temperature', 'NOx', 'SO2',
'WSpeed', 'SRad', 'WDir', 'Temp'], dtype=object),
array(['GJA', 'MP', 'LP', 'NW'], dtype=object))
dfRaw
| Date | Site | Value | Variable | Unit | Status | Authority | |
|---|---|---|---|---|---|---|---|
| 1852255697077307782 | 2017-10-01 01:00:00 | Diepkloof | 0.124 | CO | ppm | 0 | GJA |
| 7375218027412914032 | 2017-10-01 01:00:00 | Kliprivier | 0.906 | CO | ppm | 0 | GJA |
| 3852028028839148879 | 2017-10-01 01:00:00 | Sebokeng | 0.486 | CO | ppm | 0 | GJA |
| 6878777352520101247 | 2017-10-01 01:00:00 | Sharpeville | 0.579 | CO | ppm | 0 | GJA |
| -6091580437242758771 | 2017-10-01 01:00:00 | Three Rivers | 0.627 | CO | ppm | 0 | GJA |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1406706662450839838 | 2018-09-30 22:00:00 | Phokeng | 30.330 | PM10 | ppm | 0 | NW |
| -223789239276589325 | 2018-09-30 23:00:00 | Khuma | 24.980 | PM10 | ppm | 0 | NW |
| -4668978282257229284 | 2018-09-30 23:00:00 | Phokeng | 35.258 | PM10 | ppm | 0 | NW |
| -7251294827660167585 | 2018-10-01 00:00:00 | Khuma | 25.230 | PM10 | ppm | 0 | NW |
| 381845362496843923 | 2018-10-01 00:00:00 | Phokeng | 26.856 | PM10 | ppm | 0 | NW |
17133756 rows × 7 columns
# Apply limits to all variables
varlimits={"NO":[0.1,80],
"NO2":[0.5,250],
"NOx":[0.5,250],
"O3":[1,150],
"PM10":[5,2000],
"PM2.5":[5,1000],
"SO2":[2,500],
"CO":[0.01,20],
'WindDir':[0,360],
'SolarRadiation':[-200,2000],
'WindSpeed':[0,50],
'Temperature':[-20,50],
'RH':[0,100]
}
# Set status of out of range values
for v in Variables:
if v in varlimits.keys():
print(v,varlimits[v][0],varlimits[v][1])
df=dfRaw[dfRaw['Variable']==v]
dfRaw.loc[df[(df['Value'] < varlimits[v][0])].index,'Status']=1
dfRaw.loc[df[(df['Value'] > varlimits[v][1])].index,'Status']=1
CO 0.01 20 NO2 0.5 250 NO 0.1 80 RH 0 100 WindDir 0 360 PM10 5 2000 PM2.5 5 1000 SolarRadiation -200 2000 O3 1 150 WindSpeed 0 50 Temperature -20 50 NOx 0.5 250 SO2 2 500
for v in Variables:
dftmp=dfRaw[(dfRaw['Variable']==v)&(dfRaw['Status']==0)]
print(v,dftmp['Value'].min(),dftmp['Value'].mean(),dftmp['Value'].max())
CO 0.01 0.6735573622608952 20.0 NO2 0.5 11.377116660325004 249.728 NO 0.1 6.591517200705555 80.0 RH 0.0 51.87391638091305 100.0 WindDir 0.0 165.24235932311535 360.0 PM10 5.0 55.280370370146905 1998.0 PM2.5 5.0 26.919552548950502 1000.0 SolarRadiation -198.05 194.00219461190918 1431.98 O3 1.0 27.98816143025761 149.904 WindSpeed 0.0 2.3120908939198626 49.72 Temperature -18.45 17.914922434248794 49.86 NOx 0.5 19.194298378977795 250.0 SO2 2.0 11.355285713283404 500.0 WSpeed -0.05 1.8531684830025856 114.73 SRad -1.05 206.8467777158903 1655.32 WDir 0.0 149.64005439560265 360.0 Temp -752.0 22.744673567963176 1434.3
def stuckValue(df,Var='PM2.5 (ug/m3)',diff=0.1,stuckNumber=3):
print("In stuckValue:",Var,diff,stuckNumber)
df['diff']=(abs(df[Var]-df[Var].shift())>diff).astype(int)
df['c'] = (df['diff'] != 0).cumsum()
df['a'] = (df['c'] == 0).astype(int)
df['streak'] = df.groupby( 'c' ).cumcount() + df['a']
df.loc[df['streak'] > stuckNumber,'Status'] = 2
df.drop(['streak', 'a','c','diff'], axis=1, inplace=True)
df=df.dropna(subset=['index'])
return(df[df['Status']==2])
for s in Stations:
for v in Variables:
print(s,v)
df=extractStation(dfRaw,v,s,Status=0)
idx=stuckValue(df,Var='Value',diff=0.1,stuckNumber=2)
idx=idx.reset_index()
idx['index']=(idx.Site + idx.Variable + idx.Unit + idx.Value.astype(str)).apply(hash)
#print(idx.head())
print(idx['index'].values)
#print(dfRaw.loc[idx['index'].values])
Station_SAAQIS_Name CO In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name NO2 In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name NO In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name RH In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name WindDir In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name PM10 In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name PM2.5 In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name SolarRadiation In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name O3 In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name WindSpeed In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name Temperature In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name NOx In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name SO2 In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name WSpeed In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name SRad In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name WDir In stuckValue: Value 0.1 2 [] Station_SAAQIS_Name Temp In stuckValue: Value 0.1 2 [] Station_Name CO In stuckValue: Value 0.1 2 [] Station_Name NO2 In stuckValue: Value 0.1 2 [] Station_Name NO In stuckValue: Value 0.1 2 [] Station_Name RH In stuckValue: Value 0.1 2 [] Station_Name WindDir In stuckValue: Value 0.1 2 [] Station_Name PM10 In stuckValue: Value 0.1 2 [] Station_Name PM2.5 In stuckValue: Value 0.1 2 [] Station_Name SolarRadiation In stuckValue: Value 0.1 2 [] Station_Name O3 In stuckValue: Value 0.1 2 [] Station_Name WindSpeed In stuckValue: Value 0.1 2 [] Station_Name Temperature In stuckValue: Value 0.1 2 [] Station_Name NOx In stuckValue: Value 0.1 2 [] Station_Name SO2 In stuckValue: Value 0.1 2 [] Station_Name WSpeed In stuckValue: Value 0.1 2 [] Station_Name SRad In stuckValue: Value 0.1 2 [] Station_Name WDir In stuckValue: Value 0.1 2 [] Station_Name Temp In stuckValue: Value 0.1 2 [] Type CO In stuckValue: Value 0.1 2 [] Type NO2 In stuckValue: Value 0.1 2 [] Type NO In stuckValue: Value 0.1 2 [] Type RH In stuckValue: Value 0.1 2 [] Type WindDir In stuckValue: Value 0.1 2 [] Type PM10 In stuckValue: Value 0.1 2 [] Type PM2.5 In stuckValue: Value 0.1 2 [] Type SolarRadiation In stuckValue: Value 0.1 2 [] Type O3 In stuckValue: Value 0.1 2 [] Type WindSpeed In stuckValue: Value 0.1 2 [] Type Temperature In stuckValue: Value 0.1 2 [] Type NOx In stuckValue: Value 0.1 2 [] Type SO2 In stuckValue: Value 0.1 2 [] Type WSpeed In stuckValue: Value 0.1 2 [] Type SRad In stuckValue: Value 0.1 2 [] Type WDir In stuckValue: Value 0.1 2 [] Type Temp In stuckValue: Value 0.1 2 [] Owner CO In stuckValue: Value 0.1 2 [] Owner NO2 In stuckValue: Value 0.1 2 [] Owner NO In stuckValue: Value 0.1 2 [] Owner RH In stuckValue: Value 0.1 2 [] Owner WindDir In stuckValue: Value 0.1 2 [] Owner PM10 In stuckValue: Value 0.1 2 [] Owner PM2.5 In stuckValue: Value 0.1 2 [] Owner SolarRadiation In stuckValue: Value 0.1 2 [] Owner O3 In stuckValue: Value 0.1 2 [] Owner WindSpeed In stuckValue: Value 0.1 2 [] Owner Temperature In stuckValue: Value 0.1 2 [] Owner NOx In stuckValue: Value 0.1 2 [] Owner SO2 In stuckValue: Value 0.1 2 [] Owner WSpeed In stuckValue: Value 0.1 2 [] Owner SRad In stuckValue: Value 0.1 2 [] Owner WDir In stuckValue: Value 0.1 2 [] Owner Temp In stuckValue: Value 0.1 2 [] Location CO In stuckValue: Value 0.1 2 [] Location NO2 In stuckValue: Value 0.1 2 [] Location NO In stuckValue: Value 0.1 2 [] Location RH In stuckValue: Value 0.1 2 [] Location WindDir In stuckValue: Value 0.1 2 [] Location PM10 In stuckValue: Value 0.1 2 [] Location PM2.5 In stuckValue: Value 0.1 2 [] Location SolarRadiation In stuckValue: Value 0.1 2 [] Location O3 In stuckValue: Value 0.1 2 [] Location WindSpeed In stuckValue: Value 0.1 2 [] Location Temperature In stuckValue: Value 0.1 2 [] Location NOx In stuckValue: Value 0.1 2 [] Location SO2 In stuckValue: Value 0.1 2 [] Location WSpeed In stuckValue: Value 0.1 2 [] Location SRad In stuckValue: Value 0.1 2 [] Location WDir In stuckValue: Value 0.1 2 [] Location Temp In stuckValue: Value 0.1 2 [] Province CO In stuckValue: Value 0.1 2 [] Province NO2 In stuckValue: Value 0.1 2 [] Province NO In stuckValue: Value 0.1 2 [] Province RH In stuckValue: Value 0.1 2 [] Province WindDir In stuckValue: Value 0.1 2 [] Province PM10 In stuckValue: Value 0.1 2 [] Province PM2.5 In stuckValue: Value 0.1 2 [] Province SolarRadiation In stuckValue: Value 0.1 2 [] Province O3 In stuckValue: Value 0.1 2 [] Province WindSpeed In stuckValue: Value 0.1 2 [] Province Temperature In stuckValue: Value 0.1 2 [] Province NOx In stuckValue: Value 0.1 2 [] Province SO2 In stuckValue: Value 0.1 2 [] Province WSpeed In stuckValue: Value 0.1 2 [] Province SRad In stuckValue: Value 0.1 2 [] Province WDir In stuckValue: Value 0.1 2 [] Province Temp In stuckValue: Value 0.1 2 [] District CO In stuckValue: Value 0.1 2 [] District NO2 In stuckValue: Value 0.1 2 [] District NO In stuckValue: Value 0.1 2 [] District RH In stuckValue: Value 0.1 2 [] District WindDir In stuckValue: Value 0.1 2 [] District PM10 In stuckValue: Value 0.1 2 [] District PM2.5 In stuckValue: Value 0.1 2 [] District SolarRadiation In stuckValue: Value 0.1 2 [] District O3 In stuckValue: Value 0.1 2 [] District WindSpeed In stuckValue: Value 0.1 2 [] District Temperature In stuckValue: Value 0.1 2 [] District NOx In stuckValue: Value 0.1 2 [] District SO2 In stuckValue: Value 0.1 2 [] District WSpeed In stuckValue: Value 0.1 2 [] District SRad In stuckValue: Value 0.1 2 [] District WDir In stuckValue: Value 0.1 2 [] District Temp In stuckValue: Value 0.1 2 [] Latitude CO In stuckValue: Value 0.1 2 [] Latitude NO2 In stuckValue: Value 0.1 2 [] Latitude NO In stuckValue: Value 0.1 2 [] Latitude RH In stuckValue: Value 0.1 2 [] Latitude WindDir In stuckValue: Value 0.1 2 [] Latitude PM10 In stuckValue: Value 0.1 2 [] Latitude PM2.5 In stuckValue: Value 0.1 2 [] Latitude SolarRadiation In stuckValue: Value 0.1 2 [] Latitude O3 In stuckValue: Value 0.1 2 [] Latitude WindSpeed In stuckValue: Value 0.1 2 [] Latitude Temperature In stuckValue: Value 0.1 2 [] Latitude NOx In stuckValue: Value 0.1 2 [] Latitude SO2 In stuckValue: Value 0.1 2 [] Latitude WSpeed In stuckValue: Value 0.1 2 [] Latitude SRad In stuckValue: Value 0.1 2 [] Latitude WDir In stuckValue: Value 0.1 2 [] Latitude Temp In stuckValue: Value 0.1 2 [] Longitude CO In stuckValue: Value 0.1 2 [] Longitude NO2 In stuckValue: Value 0.1 2 [] Longitude NO In stuckValue: Value 0.1 2 [] Longitude RH In stuckValue: Value 0.1 2 [] Longitude WindDir In stuckValue: Value 0.1 2 [] Longitude PM10 In stuckValue: Value 0.1 2 [] Longitude PM2.5 In stuckValue: Value 0.1 2 [] Longitude SolarRadiation In stuckValue: Value 0.1 2 [] Longitude O3 In stuckValue: Value 0.1 2 [] Longitude WindSpeed In stuckValue: Value 0.1 2 [] Longitude Temperature In stuckValue: Value 0.1 2 [] Longitude NOx In stuckValue: Value 0.1 2 [] Longitude SO2 In stuckValue: Value 0.1 2 [] Longitude WSpeed In stuckValue: Value 0.1 2 [] Longitude SRad In stuckValue: Value 0.1 2 [] Longitude WDir In stuckValue: Value 0.1 2 [] Longitude Temp In stuckValue: Value 0.1 2 [] Height CO In stuckValue: Value 0.1 2 [] Height NO2 In stuckValue: Value 0.1 2 [] Height NO In stuckValue: Value 0.1 2 [] Height RH In stuckValue: Value 0.1 2 [] Height WindDir In stuckValue: Value 0.1 2 [] Height PM10 In stuckValue: Value 0.1 2 [] Height PM2.5 In stuckValue: Value 0.1 2 [] Height SolarRadiation In stuckValue: Value 0.1 2 [] Height O3 In stuckValue: Value 0.1 2 [] Height WindSpeed In stuckValue: Value 0.1 2 [] Height Temperature In stuckValue: Value 0.1 2 [] Height NOx In stuckValue: Value 0.1 2 [] Height SO2 In stuckValue: Value 0.1 2 [] Height WSpeed In stuckValue: Value 0.1 2 [] Height SRad In stuckValue: Value 0.1 2 [] Height WDir In stuckValue: Value 0.1 2 [] Height Temp In stuckValue: Value 0.1 2 []
Print the monthly average and number of months for which there are data
Threshold=65
Data=[]
for s in CStations:
for v in ['PM10','PM2.5']:
df=extractStation(dfRaw,v,s,Status=0)
# Daily averages
tValue=df['Value'].dropna().resample('1D').mean()
tCount=df['Value'].dropna().resample('1D').count()
tDaily=pd.concat([tValue,tCount],axis=1)
tDaily.columns=['Mean','N']
tDaily.loc[tDaily['N']/24*100<Threshold,'Mean']=np.nan
# Monthly averages
tValue=tDaily['Mean'].dropna().resample('1M').mean()
tCount=tDaily['Mean'].dropna().resample('1M').count()
tMonthly=pd.concat([tValue,tCount],axis=1)
tMonthly.columns=['Mean','N']
tMonthly.loc[tMonthly['N']/tMonthly.index.days_in_month*100<Threshold,'Mean']=np.nan
# Annual average
tValue=tMonthly['Mean'].dropna().resample('1Y').mean()
tCount=tMonthly['Mean'].dropna().resample('1Y').count()
tAnnual=pd.concat([tValue,tCount],axis=1)
tAnnual.columns=['Mean','N']
tAnnual.loc[tAnnual['N']/12*100<Threshold,'Mean']=np.nan
#
if len(tAnnual['Mean'].dropna()) > 0:
Data.append([s,v,tAnnual['Mean'].mean(),tAnnual['Mean'].count()])
print("{},{},{:.1f},{}".format(s,v,tAnnual['Mean'].mean(),tAnnual['Mean'].count()))
else:
tMonthly['Month']=tMonthly.index.month
tMonthly=tMonthly.groupby('Month').mean()[['Mean']]
if tMonthly['Mean'].count()/12*100>Threshold:
print("{},{},{:.1f},{} months".format(s,v,tMonthly['Mean'].mean(),tMonthly['Mean'].count()))
Data.append([s,v,tMonthly['Mean'].mean(),tMonthly['Mean'].count()])
dfAnn=pd.pivot_table(pd.DataFrame(data=Data, columns=['Site','Variable','Average','N']),index='Site',columns='Variable',values=['Average','N'])
dfAnn
Alexandra,PM10,69.0,1 Alexandra,PM2.5,35.9,1 Balfour,PM10,54.2,2 Balfour,PM2.5,22.6,11 months Bedfordview,PM10,49.0,3 Bedfordview,PM2.5,27.6,3 Bodibeng,PM10,69.1,10 months Bodibeng,PM2.5,30.2,1 Booysens,PM10,72.3,10 months Bosjesspruit,PM10,45.2,3 Bosjesspruit,PM2.5,25.5,2 Buccleugh,PM10,41.7,8 months Camden,PM10,33.1,2 Camden,PM2.5,21.9,1 Chicken_Farm,PM10,41.5,4 Club,PM10,39.9,2 Club,PM2.5,25.5,3 Delmas,PM10,90.6,1 Diepkloof,PM10,37.8,5 Diepkloof,PM2.5,24.2,3 Diepsloot,PM10,43.1,2 Elandsfontein,PM10,24.3,1 Elandsfontein,PM2.5,25.9,1 Embalenhle,PM10,44.5,5 Embalenhle,PM2.5,27.9,1 Embalenhle_North,PM10,73.9,11 months Embalenhle_North,PM2.5,36.4,11 months Ermelo,PM10,72.2,12 months Ermelo,PM2.5,28.9,12 months Etwatwa,PM10,98.8,2 Etwatwa,PM2.5,52.3,2 Grootvlei_Eskom,PM10,32.0,1 Grootvlei_Eskom,PM2.5,24.5,10 months Hammanskraal,PM10,63.2,1 Hammanskraal,PM2.5,26.8,1 Hendrina,PM10,35.5,9 months Ivory_park,PM10,49.1,11 months Jabavu,PM10,67.0,4 Jabavu,PM2.5,41.0,3 Jouberton,PM10,53.5,1 Jouberton,PM2.5,18.2,11 months KOMATI,PM10,70.0,1 Kendal,PM10,66.4,10 months Khuma,PM10,71.6,1 Khuma,PM2.5,19.5,10 months Kliprivier,PM10,56.8,3 Kliprivier,PM2.5,38.3,2 Kriel Village,PM10,53.5,1 Kriel Village,PM2.5,26.4,1 Kwazamokuhle,PM10,66.7,1 Lebohang,PM10,55.3,1 Lebohang,PM2.5,28.0,1 Lephalale,PM10,26.0,4 Lephalale,PM2.5,17.3,2 Mafikeng,PM10,47.7,2 Mafikeng,PM2.5,31.0,1 Mamelodi,PM10,234.5,1 Marikana,PM10,124.1,1 Marikana,PM2.5,31.4,1 Medupi,PM10,34.0,3 Medupi,PM2.5,17.8,9 months Mhluzi,PM10,62.6,9 months Mhluzi,PM2.5,39.7,8 months Middelburg,PM10,45.5,2 Middelburg,PM2.5,22.3,11 months Middelburg_Province,PM10,41.8,3 Middelburg_Province,PM2.5,21.0,12 months Mokopane,PM10,58.8,4 Mokopane,PM2.5,20.4,2 NWU_Vaal,PM10,44.2,2 NWU_Vaal,PM2.5,26.2,2 Olievenhoutbosch,PM10,97.2,10 months Olievenhoutbosch,PM2.5,38.7,1 Olifantsfontein,PM10,70.7,2 Olifantsfontein,PM2.5,33.9,2 Orange_Farm,PM10,43.0,11 months PTA_West,PM10,29.6,1 Phalaborwa,PM10,27.7,3 Phalaborwa,PM2.5,15.3,8 months Phokeng,PM10,120.0,9 months Randwater,PM2.5,21.0,11 months Rosslyn,PM10,33.4,2 Rosslyn,PM2.5,25.6,1 Sebokeng,PM10,39.1,1 Sebokeng,PM2.5,31.3,1 Secunda,PM10,54.6,11 months Sharpeville,PM10,57.8,1 Sharpeville,PM2.5,36.6,1 Springs,PM10,39.6,2 Springs,PM2.5,32.4,1 Standerton,PM10,102.3,1 Standerton,PM2.5,28.5,2 Steelpoort,PM2.5,12.5,1 Thabazimbi,PM10,50.7,3 Thabazimbi,PM2.5,29.8,9 months Thokoza-NAQI,PM10,93.9,2 Three Rivers,PM10,61.3,3 Three Rivers,PM2.5,27.5,3 Tswane_Market,PM10,47.1,1 Tswane_Market,PM2.5,35.9,1 Vanderbijlpark,PM10,69.4,4 Vanderbijlpark,PM2.5,30.8,2 Welgegund,PM10,24.0,3 Welgegund,PM2.5,14.5,11 months Xanadu,PM10,58.2,2 Xanadu,PM2.5,43.4,3 eMalahleni,PM10,52.1,12 months eMalahleni,PM2.5,33.9,9 months eMalahleni_Province,PM10,71.1,9 months
| Average | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Alexandra | 69.000843 | 35.930254 | 1.0 | 1.0 |
| Balfour | 54.243009 | 22.592085 | 2.0 | 11.0 |
| Bedfordview | 48.984103 | 27.646535 | 3.0 | 3.0 |
| Bodibeng | 69.062161 | 30.225458 | 10.0 | 1.0 |
| Booysens | 72.324412 | NaN | 10.0 | NaN |
| ... | ... | ... | ... | ... |
| Vanderbijlpark | 69.430570 | 30.752688 | 4.0 | 2.0 |
| Welgegund | 23.989938 | 14.479458 | 3.0 | 11.0 |
| Xanadu | 58.212416 | 43.434399 | 2.0 | 3.0 |
| eMalahleni | 52.133645 | 33.940644 | 12.0 | 9.0 |
| eMalahleni_Province | 71.121457 | NaN | 9.0 | NaN |
64 rows × 4 columns
dfAnn=pd.pivot_table(pd.DataFrame(data=Data, columns=['Site','Variable','Average','N']),index='Site',columns='Variable',values=['Average','N'])
dfAnn
| Average | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Alexandra | 69.000843 | 35.930254 | 1.0 | 1.0 |
| Balfour | 54.243009 | 22.592085 | 2.0 | 11.0 |
| Bedfordview | 48.984103 | 27.646535 | 3.0 | 3.0 |
| Bodibeng | 69.062161 | 30.225458 | 10.0 | 1.0 |
| Booysens | 72.324412 | NaN | 10.0 | NaN |
| ... | ... | ... | ... | ... |
| Vanderbijlpark | 69.430570 | 30.752688 | 4.0 | 2.0 |
| Welgegund | 23.989938 | 14.479458 | 3.0 | 11.0 |
| Xanadu | 58.212416 | 43.434399 | 2.0 | 3.0 |
| eMalahleni | 52.133645 | 33.940644 | 12.0 | 9.0 |
| eMalahleni_Province | 71.121457 | NaN | 9.0 | NaN |
64 rows × 4 columns
dfAnn.sort_values(('Average','PM10'), ascending=False)
| Average | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Mamelodi | 234.515924 | NaN | 1.0 | NaN |
| Marikana | 124.053699 | 31.401130 | 1.0 | 1.0 |
| Phokeng | 120.030468 | NaN | 9.0 | NaN |
| Standerton | 102.325200 | 28.518791 | 1.0 | 2.0 |
| Etwatwa | 98.780250 | 52.327086 | 2.0 | 2.0 |
| ... | ... | ... | ... | ... |
| Lephalale | 26.000416 | 17.289419 | 4.0 | 2.0 |
| Elandsfontein | 24.299932 | 25.889746 | 1.0 | 1.0 |
| Welgegund | 23.989938 | 14.479458 | 3.0 | 11.0 |
| Randwater | NaN | 21.012719 | NaN | 11.0 |
| Steelpoort | NaN | 12.492006 | NaN | 1.0 |
64 rows × 4 columns
%matplotlib inline
def SummaryTable(df,month):
days={1:31,2:28,3:31,4:30,5:31,6:30,7:31,8:31,9:30,10:31,11:30,12:31}
ndur={'1D':1,'8H':3,'1H':24}
ndays=days[month]
pols=[["PM10",'1D',75,4],
["PM2.5",'1D',40,4],
["SO2","1H",134,88],
["SO2","1D",48,4],
["NO2","1H",106,88],
["CO","1H",26,88],
["CO","8H",8.7,11],
["O3","8H",61,11]]
output=[["Agent", "Period", "Standard", "Exceeds", "Average", "Interval", "N", "N exceeds", "Std Dev", "Median", "25%", "75%", "99%","DataAvailable (%)"]]
for p in pols:
pname=p[0]
pdur=p[1]
paqs=p[2]
pexc=p[3]
#print("Doing",pname,pdur,paqs,pexc)
if pdur == "A":
t1=df[[pname]].dropna().resample('1M').mean()
t2=df[[pname]].dropna().resample('1M').count()
t2.columns=['N']
if t2.sum()[0] < 34560:
N=0
Nexc=np.nan
else:
N=1
if t1.mean()[0] > paqs:
Nexc=1
else:
Nexc=0
else:
if pdur == '8H':
t1=df[[pname]].resample('1H').mean().rolling(window=8,min_periods=4).mean()
t2=df[[pname]].dropna().resample('1H').mean().rolling(window=8,min_periods=4).count()
else:
t1=df[[pname]].dropna().resample(pdur).mean()
t2=df[[pname]].dropna().resample(pdur).count()
# t1.ix[df[pname]<10,pname]=np.nan
t2.columns=['N']
t=pd.concat([t1,t2],axis=1)
t.loc[t['N']<DataAvailable[pdur],pname]=np.nan
N=len(t[pname].dropna())
Nexc=len(t[t[pname]>paqs])
if N>4:
#print(t[pname].dropna().describe())
if N<30:
CIs=confidence_interval(t[pname].dropna().values)
#bootstrap.ci(data=t[pname].dropna(),statfunction=sp.mean,alpha=0.05)
elif N>5:
CIs=confidence_interval(t[pname].dropna().values)
else:
CIs=["",""]
Pave=t[pname].mean()
Pstd=t[pname].std()
P50=t[pname].quantile(0.5)
P25=t[pname].quantile(0.25)
P75=t[pname].quantile(0.75)
P99=t[pname].quantile(0.99)
else:
CIs=[np.nan,np.nan]
Pave=t[pname].mean()
Pstd=np.nan
P50=np.nan
P25=np.nan
P75=np.nan
P99=np.nan
#print(t[pname].mean(),CIs,N)
DA=N/(ndays*ndur[pdur])*100
output.append([pname,pdur,paqs,pexc,"%.0f" % (Pave),"%.0f-%.0f" % (CIs[0],CIs[1]),N,Nexc,"%.0f" % (Pstd),"%.0f" % (P50),"%.0f" % (P25),"%.0f" % (P75),"%.0f" % (P99),"%.0f" % (DA)])
a=np.array(output)
df2=pd.DataFrame(data=a[1:,1:],index=a[1:,0],columns=a[0,1:])
df2=df2.replace("nan-nan", "")
df2=df2.replace("nan", "")
#print(t)
return(df2)
def DataAvailableTable(df):
#output=[["Parameter", "Data Available (mins)", "DataAvailable (%)"]]
output={}
TotalHours=len(df['Value'])
HoursData=len(df['Value'].dropna())
output['Value']=["{:.0f}".format(HoursData),"{:.0f}".format(HoursData/TotalHours*100)]
return(output)
def extractStationVars(df,Site="",Vars=[],Status=0):
df=df[(df['Status']<=Status)&(df['Site']==Site)]
df=pd.pivot_table(df,index=['Date'],columns=['Variable'],values=['Value'])
df.index=pd.to_datetime(df.index)
df.columns=df.columns.droplevel(0)
if len(Vars) > 0:
Vars=[v for v in Vars if v in df.columns]
df=df[Vars]
df=df.resample('1H').first()
return(df)
dftmp=extractStationVars(dfRaw,Site='Jabavu')
#dftmp.columns=dftmp.columns.droplevel(0)
%matplotlib inline
df2=dftmp[['PM10','PM2.5']].stack().reset_index()
df2.columns=['Date','Variable','Concentration (ug/m3)']
df2.index = df2['Date']
#define figure size
sns.set(rc={"figure.figsize":(10, 10)}) #width=8, height=4
sns.boxplot(x=df2.index.month,y='Concentration (ug/m3)',hue='Variable',data=df2)
<AxesSubplot:xlabel='Date', ylabel='Concentration (ug/m3)'>
p10=lambda x: np.percentile(x[~np.isnan(x)],10)
p10.__name__='p10'
p25=lambda x: np.percentile(x[~np.isnan(x)],25)
p25.__name__='p25'
p75=lambda x: np.percentile(x[~np.isnan(x)],75)
p75.__name__='p75'
p90=lambda x: np.percentile(x[~np.isnan(x)],90)
p90.__name__='p90'
df3=pd.pivot_table(dftmp,index=dftmp.index.month,columns=dftmp.index.hour, values=['PM2.5'],aggfunc=[p10,p25,p75,p90]).stack()
df3
| p10 | p25 | p75 | p90 | ||
|---|---|---|---|---|---|
| Variable | PM2.5 | PM2.5 | PM2.5 | PM2.5 | |
| Date | Date | ||||
| 1 | 0 | 8 | 10 | 22 | 41 |
| 1 | 7 | 9 | 21 | 41 | |
| 2 | 6 | 9 | 21 | 33 | |
| 3 | 7 | 9 | 20 | 30 | |
| 4 | 7 | 9 | 20 | 26 | |
| ... | ... | ... | ... | ... | ... |
| 12 | 19 | 7 | 9 | 20 | 28 |
| 20 | 9 | 12 | 23 | 30 | |
| 21 | 11 | 13 | 28 | 39 | |
| 22 | 9 | 13 | 27 | 43 | |
| 23 | 8 | 11 | 25 | 40 |
288 rows × 4 columns
df3.loc[1,0]['p10'].values[0]
7.7835
getAQClimate(dftmp,var="PM2.5",start="2021-03-15",end="2021-03-16")
| Variable | PM2.5 | PM2.5p10 | PM2.5p90 | PM2.5p25 | PM2.5p75 |
|---|---|---|---|---|---|
| Date | |||||
| 2021-03-15 00:00:00 | 17 | 10 | 69 | 17 | 50 |
| 2021-03-15 01:00:00 | 22 | 9 | 57 | 15 | 43 |
| 2021-03-15 02:00:00 | 11 | 8 | 47 | 12 | 39 |
| 2021-03-15 03:00:00 | 11 | 8 | 41 | 11 | 34 |
| 2021-03-15 04:00:00 | 9 | 9 | 43 | 12 | 29 |
| 2021-03-15 05:00:00 | 8 | 8 | 38 | 13 | 27 |
| 2021-03-15 06:00:00 | 7 | 9 | 37 | 13 | 26 |
| 2021-03-15 07:00:00 | 11 | 10 | 36 | 13 | 28 |
| 2021-03-15 08:00:00 | 10 | 8 | 43 | 13 | 30 |
| 2021-03-15 09:00:00 | 8 | 8 | 28 | 10 | 22 |
| 2021-03-15 10:00:00 | 10 | 7 | 23 | 10 | 18 |
| 2021-03-15 11:00:00 | 9 | 7 | 21 | 9 | 17 |
| 2021-03-15 12:00:00 | 8 | 7 | 24 | 9 | 17 |
| 2021-03-15 13:00:00 | 8 | 7 | 25 | 10 | 17 |
| 2021-03-15 14:00:00 | 8 | 7 | 24 | 9 | 18 |
| 2021-03-15 15:00:00 | 8 | 6 | 24 | 10 | 18 |
| 2021-03-15 16:00:00 | 8 | 7 | 27 | 9 | 19 |
| 2021-03-15 17:00:00 | 16 | 6 | 26 | 9 | 19 |
| 2021-03-15 18:00:00 | 14 | 8 | 26 | 11 | 20 |
| 2021-03-15 19:00:00 | 19 | 11 | 35 | 13 | 26 |
| 2021-03-15 20:00:00 | 73 | 14 | 53 | 19 | 38 |
| 2021-03-15 21:00:00 | 91 | 11 | 62 | 22 | 44 |
| 2021-03-15 22:00:00 | 69 | 13 | 71 | 20 | 45 |
| 2021-03-15 23:00:00 | 30 | 10 | 67 | 19 | 54 |
| 2021-03-16 00:00:00 | 44 | 10 | 69 | 17 | 50 |
| 2021-03-16 01:00:00 | 30 | 9 | 57 | 15 | 43 |
| 2021-03-16 02:00:00 | 35 | 8 | 47 | 12 | 39 |
| 2021-03-16 03:00:00 | 37 | 8 | 41 | 11 | 34 |
| 2021-03-16 04:00:00 | 35 | 9 | 43 | 12 | 29 |
| 2021-03-16 05:00:00 | 34 | 8 | 38 | 13 | 27 |
| 2021-03-16 06:00:00 | 30 | 9 | 37 | 13 | 26 |
| 2021-03-16 07:00:00 | 31 | 10 | 36 | 13 | 28 |
| 2021-03-16 08:00:00 | 24 | 8 | 43 | 13 | 30 |
| 2021-03-16 09:00:00 | 15 | 8 | 28 | 10 | 22 |
| 2021-03-16 10:00:00 | 18 | 7 | 23 | 10 | 18 |
| 2021-03-16 11:00:00 | 16 | 7 | 21 | 9 | 17 |
| 2021-03-16 12:00:00 | 19 | 7 | 24 | 9 | 17 |
| 2021-03-16 13:00:00 | 20 | 7 | 25 | 10 | 17 |
| 2021-03-16 14:00:00 | 22 | 7 | 24 | 9 | 18 |
| 2021-03-16 15:00:00 | 20 | 6 | 24 | 10 | 18 |
| 2021-03-16 16:00:00 | 23 | 7 | 27 | 9 | 19 |
| 2021-03-16 17:00:00 | 23 | 6 | 26 | 9 | 19 |
| 2021-03-16 18:00:00 | 25 | 8 | 26 | 11 | 20 |
| 2021-03-16 19:00:00 | 28 | 11 | 35 | 13 | 26 |
| 2021-03-16 20:00:00 | 82 | 14 | 53 | 19 | 38 |
| 2021-03-16 21:00:00 | 89 | 11 | 62 | 22 | 44 |
| 2021-03-16 22:00:00 | 91 | 13 | 71 | 20 | 45 |
| 2021-03-16 23:00:00 | 79 | 10 | 67 | 19 | 54 |
def cleanplot(ax):
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
def getAQClimate(df,var="",start="",end=""):
p10=lambda x: np.percentile(x[~np.isnan(x)],10)
p10.__name__='p10'
p25=lambda x: np.percentile(x[~np.isnan(x)],25)
p25.__name__='p25'
p75=lambda x: np.percentile(x[~np.isnan(x)],75)
p75.__name__='p75'
p90=lambda x: np.percentile(x[~np.isnan(x)],90)
p90.__name__='p90'
df2=pd.pivot_table(dftmp,index=dftmp.index.month,columns=dftmp.index.hour, values=[var],aggfunc=[p10,p25,p75,p90]).stack()
df1=df[start:end][[var]]
df1[var+'p10']=np.nan
df1[var+'p90']=np.nan
df1[var+'p25']=np.nan
df1[var+'p75']=np.nan
for idx,row in df1.iterrows():
df1.loc[idx,var+'p10']=df2.loc[idx.month,idx.hour]['p10'].values[0]
df1.loc[idx,var+'p90']=df2.loc[idx.month,idx.hour]['p90'].values[0]
df1.loc[idx,var+'p25']=df2.loc[idx.month,idx.hour]['p25'].values[0]
df1.loc[idx,var+'p75']=df2.loc[idx.month,idx.hour]['p75'].values[0]
return(df1)
def plotTimeSeriesPeriod(df,title="", var="", start="",end="",climate=False):
# Size of 1x graph y-axis
Y=2
mpl.style.use('default')
# Set period
if len(start)>0 and len(end)>0:
df2=df[start:end]
elif len(start)>0 and len(end)==0:
df2=df[start:]
elif len(start)==0 and len(end)>0:
df2=df[:end]
else:
df2=df
#df=df.resample('1D').mean()
# Count the number of plots
n=0
plotpm10=False
plotpm25=False
plotso2=False
plotno=False
plotno2=False
ploto3=False
plotwinds=True
if 'PM10' in df.columns and len(df2['PM10'].dropna()) > 0:
n=n+1
plotpm10=True
if 'PM2.5' in df.columns and len(df2['PM2.5'].dropna()) > 0:
n=n+1
plotpm25=True
if 'SO2' in df.columns and len(df2['SO2'].dropna()) > 0:
n=n+1
plotso2=True
ysize=n*Y
f,ax = plt.subplots(n,sharex=True, figsize=[8,ysize]) # fig.add_subplot(1,1,1)
i=0
if plotpm10:
df3=getAQClimate(df,var="PM10",start=start,end=end)
ax[i].fill_between(df3.index, df3['PM10p10'], df3['PM10p90'], alpha=.25, facecolor='tab:blue', label='10-90%')
ax[i].fill_between(df3.index, df3['PM10p25'], df3['PM10p75'], alpha=.5, facecolor='tab:blue', label='25-75%')
ax[i].plot(df2.index,df2['PM10'], color='tab:orange', label='PM10 (ug/m3)')
cleanplot(ax[i])
ax[i].set_ylabel("ug/m3")
ax[i].legend()
i=i+1
if plotpm25:
df3=getAQClimate(df,var="PM2.5",start=start,end=end)
ax[i].fill_between(df3.index, df3['PM2.5p10'], df3['PM2.5p90'], alpha=.25, facecolor='tab:blue')
ax[i].fill_between(df3.index, df3['PM2.5p25'], df3['PM2.5p75'], alpha=.5, facecolor='tab:blue')
ax[i].plot(df2.index,df2['PM2.5'], color='tab:orange', label='PM2.5 (ug/m3)')
cleanplot(ax[i])
ax[i].set_ylabel("ug/m3")
ax[i].legend()
i=i+1
if plotso2:
df3=getAQClimate(df,var="SO2",start=start,end=end)
ax[i].fill_between(df3.index, df3['SO2p10'], df3['SO2p90'], alpha=.25, facecolor='tab:blue')
ax[i].fill_between(df3.index, df3['SO2p25'], df3['SO2p75'], alpha=.5, facecolor='tab:blue')
ax[i].plot(df2.index,df2['SO2'], color='tab:orange', label='SO2 (ppb)')
cleanplot(ax[i])
ax[i].legend()
ax[i].set_ylabel("ppb")
i=i+1
i=i-1
# Hide the spines of the figure
ax[i].spines["top"].set_visible(False)
ax[i].spines["right"].set_visible(False)
plt.xticks(fontsize=9)
# Ensure that the ax1is ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax[i].get_xaxis().tick_bottom()
ax[i].get_yaxis().tick_left()
if len(title) > 0:
f.suptitle(title)
plt.tight_layout()
#plt.savefig('Figure.png', bbox_inches='tight', dpi=300)
def plotTimeSeries(df,title=""):
# Size of 1x graph y-axis
Y=2
mpl.style.use('default')
#df=df.resample('1D').mean()
# Count the number of plots
n=0
plotpm10=False
plotpm25=False
plotso2=False
plotno=False
plotno2=False
ploto3=False
plotwinds=True
if 'PM10' in df.columns and len(df['PM10'].dropna()) > 0:
n=n+1
plotpm10=True
if 'PM2.5' in df.columns and len(df['PM2.5'].dropna()) > 0:
if not plotpm10:
n=n+1
plotpm25=True
if 'SO2' in df.columns and len(df['SO2'].dropna()) > 0:
n=n+1
plotso2=True
if 'NO' in df.columns and len(df['NO'].dropna()) > 0:
n=n+1
plotno=True
if 'NO2' in df.columns and len(df['NO2'].dropna()) > 0:
if not plotno:
n=n+1
plotno2=True
if 'O3' in df.columns and len(df['O3'].dropna()) > 0:
if not plotno and not plotno2:
n=n+1
ploto3=True
if 'WindSpeed' in df.columns and len(df['WindSpeed'].dropna()) > 0 and 'WindDir' in df.columns and len(df['WindDir'].dropna()) > 0:
n=n+1
plotwinds=True
print(n)
ysize=n*Y
f,ax = plt.subplots(n,sharex=True, figsize=[8,ysize]) # fig.add_subplot(1,1,1)
i=0
if plotpm10:
ax[i].plot(df.index,df['PM10'], color='tab:orange', label='PM10 (ug/m3)')
if plotpm25:
ax[i].plot(df.index,df['PM2.5'], color='tab:blue', label='PM10 (ug/m3)')
if plotpm10 or plotpm25:
cleanplot(ax[i])
ax[i].legend(ncol=2)
ax[i].set_ylabel("ug/m3")
i=i+1
if plotso2:
ax[i].plot(df.index,df['SO2'], color='tab:blue', label='SO2 (ppb)')
cleanplot(ax[i])
ax[i].legend()
ax[i].set_ylabel("ppb")
i=i+1
if plotno:
ax[i].plot(df.index,df['NO'], color='tab:blue', label='NO (ppb)')
if plotno2:
ax[i].plot(df.index,df['NO2'], color='tab:green', label='NO2 (ppb)')
if ploto3:
ax[i].plot(df.index,df['O3'], color='tab:orange', label='O3 (ppb)')
if plotno or plotno2 or ploto3:
cleanplot(ax[i])
ax[i].legend()
ax[i].set_ylabel("ppb")
i=i+1
if plotwinds:
axw=ax[i].twinx()
axw.plot(df.index,df['WindDir'], color='tab:orange', label='Wind Direction', alpha=.25)
ax[i].plot(df.index,df['WindSpeed'], color='tab:blue', label='Wind Speed')
ax[i].legend()
axw.legend()
ax[i].set_ylabel("m/s")
axw.set_ylabel("degrees")
axw.spines["top"].set_visible(False)
axw.spines["left"].set_visible(False)
i=i+1
i=i-1
# Hide the spines of the figure
ax[i].spines["top"].set_visible(False)
ax[i].spines["right"].set_visible(False)
# Ensure that the ax1is ticks only show up on the bottom and left of the plot.
# Ticks on the right and top of the plot are generally unnecessary chartjunk.
ax[i].get_xaxis().tick_bottom()
ax[i].get_yaxis().tick_left()
if len(title) > 0:
f.suptitle(title)
plt.tight_layout()
#plt.savefig('Figure.png', bbox_inches='tight', dpi=300)
dfRaw['Date']=pd.to_datetime(dfRaw['Date'])
df1=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='GJA')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df1.index.get_level_values
df1=(df1.groupby([level_values(0)]+[pd.Grouper(freq='1Y', level=-1)]).mean())
df1 =df1.reset_index()
usesites = pd.pivot_table(df1,index='Site',columns=['Date'],values='PM10')
df1 = usesites.dropna()
df1.sort_index()
col_map = plt.get_cmap('tab20c')
ax = df1.plot(kind='bar', figsize=(15,10), width=0.6)
plt.axhline(y = 40, color = 'r',label='PM10 daily Standard', linestyle='--')
plt.plot()
plt.legend(loc='upper left')
plt.suptitle('Annual average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
%matplotlib inline
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='GJA')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='GJA')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([40,40],[m,M], color='tab:red', label='PM2.5 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM25_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='GJA')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='GJA')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
df1=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='MP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df1.index.get_level_values
df1=(df1.groupby([level_values(0)]+[pd.Grouper(freq='1Y', level=-1)]).mean())
df1 =df1.reset_index()
usesites = pd.pivot_table(df1,index='Site',columns=['Date'],values='PM10')
df1 = usesites.dropna()
df1.sort_index()
col_map = plt.get_cmap('tab20c')
ax = df1.plot(kind='bar', figsize=(15,10), width=0.6)
plt.axhline(y = 40, color = 'r',label='PM10 daily Standard', linestyle='--')
plt.plot()
plt.legend(loc='upper left')
plt.suptitle('Annual average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='MP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='MP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([40,40],[m,M], color='tab:red', label='PM2.5 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM25_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='MP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='MP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
df1=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='LP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df1.index.get_level_values
df1=(df1.groupby([level_values(0)]+[pd.Grouper(freq='1Y', level=-1)]).mean())
df1 =df1.reset_index()
usesites = pd.pivot_table(df1,index='Site',columns=['Date'],values='PM10')
df1 = usesites
df1.sort_index()
col_map = plt.get_cmap('tab20c')
ax = df1.plot(kind='bar', figsize=(15,10), width=0.6)
plt.axhline(y = 40, color = 'r',label='PM10 daily Standard', linestyle='--')
plt.plot()
plt.legend(loc='upper left')
plt.suptitle('Annual average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='LP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='LP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([40,40],[m,M], color='tab:red', label='PM2.5 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM25_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='LP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
usesites.remove('Steelpoort')
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='LP')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
df1=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='NW')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df1.index.get_level_values
df1=(df1.groupby([level_values(0)]+[pd.Grouper(freq='1Y', level=-1)]).mean())
df1 =df1.reset_index()
usesites = pd.pivot_table(df1,index='Site',columns=['Date'],values='PM10')
df1 = usesites
df1.sort_index()
col_map = plt.get_cmap('tab20c')
ax = df1.plot(kind='bar', figsize=(15,10), width=0.6)
plt.axhline(y = 40, color = 'r',label='PM10 daily Standard', linestyle='--')
plt.plot()
plt.legend(loc='upper left')
plt.suptitle('Annual average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='NW')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='NW')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1D', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,250)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
plt.plot([40,40],[m,M], color='tab:red', label='PM2.5 daily Standard', linestyle='--')
plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM25_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM10')&(dfRaw['Authority']=='NW')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM10').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM10',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
# Get daily averages from short format quickly
df4=pd.pivot_table(dfRaw[(dfRaw['Status']==0)&(dfRaw['Variable']=='PM2.5')&(dfRaw['Authority']=='NW')],index=['Site','Date'],columns=['Variable'],values='Value')
level_values=df4.index.get_level_values
df4=(df4.groupby([level_values(i) for i in [0]]+[pd.Grouper(freq='1H', level=-1)]).mean())
df4=df4.reset_index()
goodsites=pd.pivot_table(df4,index='Date',columns=['Site'],values='PM2.5').count()
usesites=list(goodsites[goodsites>500].index.values)
df4=df4[df4['Site'].isin(usesites)]
ax=df4.boxplot(column='PM2.5',by='Site', vert=False, figsize=(8,10), return_type='axes')
ax[0].spines["top"].set_visible(False)
ax[0].spines["bottom"].set_visible(False)
ax[0].spines["right"].set_visible(False)
ax[0].set_xlim(0,1000)
ax[0].grid(False)
ax[0].set_xlabel("Concentration (ug/m3)")
(m,M)=ax[0].get_ylim()
#plt.plot([75,75],[m,M], color='tab:red', label='PM10 daily Standard', linestyle='--')
#plt.legend(loc='upper left', bbox_to_anchor=(0,1.03))
plt.suptitle('Daily average for available data')
plt.tight_layout()
#plt.savefig('GJA_PM10_daily.png', bbox_inches='tight',format='png', dpi=300)
import matplotlib.pyplot as plt
import numpy as np
from scipy import interpolate
wd = [90, 297, 309, 336, 20, 2, 334, 327, 117, 125, 122, 97, 95, 97, 103, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
ws = [15, 1.6, 1.8, 1.7, 2.1, 1.6, 2.1, 1.4, 3, 6.5, 7.1, 8.2, 10.2, 10.2, 10.8, 10.2, 11.4, 9.7, 8.6, 7.1, 6.4, 5.5, 5, 5, 6]
oz = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 90, 140, 100, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
wd_rad = np.radians(np.array(wd))
oz = np.array(oz, dtype=np.float)
WD, WS = np.meshgrid(np.linspace(0, 2*np.pi, 36), np.linspace(min(ws), max(ws), 16 ))
Z = interpolate.griddata((wd_rad, ws), oz, (WD, WS), method='linear')
fig, ax = plt.subplots(subplot_kw={"projection": "polar"})
cmap = plt.get_cmap('hot')
cmap.set_under('none')
img = ax.pcolormesh(WD, WS, Z, cmap=cmap, vmin=20)
plt.colorbar(img)
plt.show()
/tmp/ipykernel_115185/617883357.py:9: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
oz = np.array(oz, dtype=np.float)
/tmp/ipykernel_115185/617883357.py:17: MatplotlibDeprecationWarning: You are modifying the state of a globally registered colormap. This has been deprecated since 3.3 and in 3.6, you will not be able to modify a registered colormap in-place. To remove this warning, you can make a copy of the colormap first. cmap = mpl.cm.get_cmap("hot").copy()
cmap.set_under('none')
/tmp/ipykernel_115185/617883357.py:18: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first.
img = ax.pcolormesh(WD, WS, Z, cmap=cmap, vmin=20)
# Define the settings for the windrose that is being created.
def plot_ppollrose(dfc,WD="WindDir",WS="WindSpeed",CO="PM10",title="",output=Path("polar.png")):
dfc=dfc[[WD,WS,CO]].dropna()
wd_rad = np.radians(np.array(dfc[WD]))
Conc = np.array(dfc[CO], dtype=float)
WSmax = max(dfc[WS])
WD1, WS1 = np.meshgrid(np.linspace(0, 2*np.pi, 360), np.linspace(min(dfc[WS]), max(dfc[WS]), 16))
Z = interpolate.griddata((wd_rad, dfc[WS]), dfc[CO], (WD1, WS1), method='linear')
levels = MaxNLocator(nbins=10).tick_values(0, 500)
cmap = plt.get_cmap('viridis')
norm = BoundaryNorm(levels, ncolors=cmap.N, clip=True)
fig, ax = plt.subplots(figsize=(4,4), subplot_kw={"projection": "polar"})
img = ax.pcolormesh(WD1, WS1, Z, cmap=cmap, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round')
cbar = plt.colorbar(img, cax=None, ax=None, use_gridspec=True, orientation='vertical', drawedges=True, pad=0.09, shrink=0.5)
cbar.ax.tick_params(labelsize=10)
cbar.set_label(CO,size=10, labelpad=+10 )
#To display the grid in the background.
ax.grid(True)
# Specify different settings for major and minor grids
#ax.grid(which = 'minor', alpha = 0.3)
#ax.grid(which = 'major', alpha = 0.7)
major_yticks = np.arange(0, 15, step=2.5)
#minor_yticks = np.arange(0, 15, step=0.5)
#major_xticks = np.arange(0, 8, step=360)
#minor_xticks = np.arange(0, 360, step=22.5)
ax.tick_params(axis = 'both', which = 'major', labelsize = 10, width=1.5, direction='out', pad=0.8, labelrotation=0, top=True, grid_color='white', grid_linewidth=1)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 0)
ax.set_yticks(major_yticks)
ax.set_ylim([0,WSmax])
#ax.set_yticks(minor_yticks, minor=True)
#ax.set_xticks(major_xticks)
#ax.set_xticks(minor_xticks, minor=True)
# To rename the x-axis from Degrees to Directions.
ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
# To move the 0 Deg from the E-point to the N-point.
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
ax.set_axisbelow(False)
fig.suptitle(title)
plt.tight_layout()
#plt.savefig(output,dpi=300, bbox_inches = "tight")
plt.show()
#plt.close()
plot_ppollrose(dftmp,CO='PM10', title='Jabavu')
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
Stations
array(['Alexandra', 'Bedfordview', 'Bodibeng', 'Booysens', 'Buccleugh',
'Diepkloof', 'Diepsloot', 'Ekandustria', 'Etwatwa', 'Hammanskraal',
'Ivory_park', 'Jabavu', 'Kliprivier', 'Leondale_City', 'Mamelodi',
'Meyerton', 'Mogale_City', 'NWU_Vaal', 'Olievenhoutbosch',
'Olifantsfontein', 'Orange_Farm', 'PTA_West', 'Randfontein',
'Randwater', 'Rosslyn', 'Sebokeng', 'Sharpeville', 'Springs',
'Springs-new', 'Tembisa', 'Thokoza', 'Three Rivers', 'Tsakane',
'Tswane_Market', 'Vanderbijlpark', 'Wattville'], dtype=object)
#Sites
dfRaw[dfRaw['Authority']=='GJA']['Site'].unique()
array(['Diepkloof', 'Kliprivier', 'Sebokeng', 'Sharpeville',
'Three Rivers', 'Etwatwa', 'Olifantsfontein', 'Springs', 'Tsakane',
'Wattville', 'Booysens', 'Ekandustria', 'Mamelodi', 'Rosslyn',
'Bodibeng', 'Mogale_City', 'Olievenhoutbosch', 'PTA_West',
'Hammanskraal', 'Thokoza-NAQI', 'Tswane_Market', 'Randwater',
'Bedfordview', 'Buccleugh', 'Tembisa', 'Randfontein',
'Leondale_City', 'Jabavu', 'Meyerton', 'Vanderbijlpark',
'Diepsloot', 'Springs-new', 'Orange_Farm', 'Alexandra', 'NWU_Vaal',
'Ivory_park'], dtype=object)
dfGJA=dfRaw[dfRaw["Site"].str.contains('(Jabavu|Diepkloof|Springs|Olifantsfontein)', regex=True)]
/tmp/ipykernel_109379/3041542741.py:1: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.
dfGJA=dfRaw[dfRaw["Site"].str.contains('(Jabavu|Diepkloof|Springs|Olifantsfontein)', regex=True)]
dfGJA['Site'].unique()
array(['Diepkloof', 'Olifantsfontein', 'Springs', 'Jabavu', 'Springs-new'],
dtype=object)
GJAsites=['Diepkloof', 'Jabavu', 'Olifantsfontein','Springs', 'Springs-new',
'Kliprivier', 'Sebokeng', 'Sharpeville',
'Three Rivers', 'Etwatwa', 'Olifantsfontein', 'Tsakane',
'Wattville', 'Booysens', 'Ekandustria', 'Rosslyn',
'Bodibeng', 'Mogale_City', 'Olievenhoutbosch', 'PTA_West',
'Hammanskraal', 'Thokoza', 'Tswane_Market', 'Randwater',
'Bedfordview', 'Buccleugh', 'Tembisa', 'Randfontein',
'Leondale_City', 'Meyerton', 'Vanderbijlpark',
'Diepsloot', 'Orange_Farm', 'Alexandra', 'NWU_Vaal',
'Ivory_park']
Threshold=65
Data=[]
for s in GJAsites:
for v in ['PM10','PM2.5']:
df=extractStation(dfRaw,v,s,Status=0)
# Daily averages
tThres=df['Value'].dropna().quantile(0.99)
tValue=df['Value'].dropna().resample('1D').mean()
tCount=df['Value'].dropna().resample('1D').count()
tDaily=pd.concat([tValue,tCount],axis=1)
tDaily.columns=['Mean','N']
tDaily.loc[tDaily['N']/24*100<Threshold,'Mean']=np.nan
# Monthly averages
tValue=tDaily['Mean'].dropna().resample('1M').mean()
tCount=tDaily['Mean'].dropna().resample('1M').count()
tMonthly=pd.concat([tValue,tCount],axis=1)
tMonthly.columns=['Mean','N']
tMonthly.loc[tMonthly['N']/tMonthly.index.days_in_month*100<Threshold,'Mean']=np.nan
# Annual average
tValue=tMonthly['Mean'].dropna().resample('1Y').mean()
tCount=tMonthly['Mean'].dropna().resample('1Y').count()
tAnnual=pd.concat([tValue,tCount],axis=1)
tAnnual.columns=['Mean','N']
tAnnual.loc[tAnnual['N']/12*100<Threshold,'Mean']=np.nan
#
if len(tAnnual['Mean'].dropna()) > 0:
Data.append([s,v,tAnnual['Mean'].mean(),tAnnual['Mean'].count()])
else:
tMonthly['Month']=tMonthly.index.month
tMonthly=tMonthly.groupby('Month').mean()[['Mean']]
if tMonthly['Mean'].count()/12*100>Threshold:
Data.append([s,v,tMonthly['Mean'].mean(),tMonthly['Mean'].count()])
dfAnn=pd.pivot_table(pd.DataFrame(data=Data, columns=['Site','Variable','Annual','N']),index='Site',columns='Variable',values=['Annual','N'])
dfAnn
| Annual | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Alexandra | 69.000843 | 35.930254 | 1.0 | 1.0 |
| Bedfordview | 48.984103 | 27.646535 | 3.0 | 3.0 |
| Bodibeng | 69.062161 | 30.225458 | 10.0 | 1.0 |
| Booysens | 72.324412 | NaN | 10.0 | NaN |
| Buccleugh | 41.742300 | NaN | 8.0 | NaN |
| Diepkloof | 37.803183 | 24.196835 | 5.0 | 3.0 |
| Diepsloot | 43.107692 | NaN | 2.0 | NaN |
| Etwatwa | 98.780250 | 52.327086 | 2.0 | 2.0 |
| Hammanskraal | 63.237761 | 26.819970 | 1.0 | 1.0 |
| Ivory_park | 49.146207 | NaN | 11.0 | NaN |
| Jabavu | 66.961013 | 40.984532 | 4.0 | 3.0 |
| Kliprivier | 56.798591 | 38.340758 | 3.0 | 2.0 |
| NWU_Vaal | 44.171942 | 26.236204 | 2.0 | 2.0 |
| Olievenhoutbosch | 97.224015 | 38.684320 | 10.0 | 1.0 |
| Olifantsfontein | 70.700723 | 33.903615 | 2.0 | 2.0 |
| Orange_Farm | 42.971296 | NaN | 11.0 | NaN |
| PTA_West | 29.572599 | NaN | 1.0 | NaN |
| Randwater | NaN | 21.012719 | NaN | 11.0 |
| Rosslyn | 33.412865 | 25.575655 | 2.0 | 1.0 |
| Sebokeng | 39.084772 | 31.259781 | 1.0 | 1.0 |
| Sharpeville | 57.828645 | 36.579435 | 1.0 | 1.0 |
| Springs | 39.628488 | 32.416056 | 2.0 | 1.0 |
| Three Rivers | 61.341365 | 27.507806 | 3.0 | 3.0 |
| Tswane_Market | 47.084085 | 35.883437 | 1.0 | 1.0 |
| Vanderbijlpark | 69.430570 | 30.752688 | 4.0 | 2.0 |
dfAnn.sort_values(('Annual','PM10'), ascending=False, inplace=True)
ax=dfAnn['Annual'].plot(kind='bar',figsize=(10,5))
plt.plot(dfAnn.index, [40 for x in dfAnn.index], color='tab:blue', label='PM10 Standard', linestyle='--')
plt.plot(dfAnn.index, [20 for x in dfAnn.index], color='tab:orange', label='PM2.5 Standard', linestyle='--')
plt.legend()
ax.spines["top"].set_visible(False)
#ax.spines["bottom"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_ylabel("Concentration (ug/m3)")
ax.set_xlabel("")
#ax.set_xlim(0,250)
ax.grid(False)
plt.tight_layout()
#plt.savefig('GJA_PM_annual.png', bbox_inches='tight',format='png', dpi=300)
pd.options.display.float_format = '{:,.0f}'.format
dfAnn.fillna('')
| Annual | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Etwatwa | 99 | 52 | 2 | 2 |
| Olievenhoutbosch | 97 | 39 | 10 | 1 |
| Booysens | 72 | 10 | ||
| Olifantsfontein | 71 | 34 | 2 | 2 |
| Vanderbijlpark | 69 | 31 | 4 | 2 |
| Bodibeng | 69 | 30 | 10 | 1 |
| Alexandra | 69 | 36 | 1 | 1 |
| Jabavu | 67 | 41 | 4 | 3 |
| Hammanskraal | 63 | 27 | 1 | 1 |
| Three Rivers | 61 | 28 | 3 | 3 |
| Sharpeville | 58 | 37 | 1 | 1 |
| Kliprivier | 57 | 38 | 3 | 2 |
| Ivory_park | 49 | 11 | ||
| Bedfordview | 49 | 28 | 3 | 3 |
| Tswane_Market | 47 | 36 | 1 | 1 |
| NWU_Vaal | 44 | 26 | 2 | 2 |
| Diepsloot | 43 | 2 | ||
| Orange_Farm | 43 | 11 | ||
| Buccleugh | 42 | 8 | ||
| Springs | 40 | 32 | 2 | 1 |
| Sebokeng | 39 | 31 | 1 | 1 |
| Diepkloof | 38 | 24 | 5 | 3 |
| Rosslyn | 33 | 26 | 2 | 1 |
| PTA_West | 30 | 1 | ||
| Randwater | 21 | 11 | ||
len(df['2017'])
0
df['Value'].dropna().quantile(0.99)
nan
int(10<5)
0
Sites = GJAsites
for s in Sites:
print(",{},,,".format(s),end="")
,Diepkloof,,,,Jabavu,,,,Olifantsfontein,,,,Springs,,,,Springs-new,,,,Kliprivier,,,,Sebokeng,,,,Sharpeville,,,,Three Rivers,,,,Etwatwa,,,,Olifantsfontein,,,,Tsakane,,,,Wattville,,,,Booysens,,,,Ekandustria,,,,Rosslyn,,,,Bodibeng,,,,Mogale_City,,,,Olievenhoutbosch,,,,PTA_West,,,,Hammanskraal,,,,Thokoza,,,,Tswane_Market,,,,Randwater,,,,Bedfordview,,,,Buccleugh,,,,Tembisa,,,,Randfontein,,,,Leondale_City,,,,Meyerton,,,,Vanderbijlpark,,,,Diepsloot,,,,Orange_Farm,,,,Alexandra,,,,NWU_Vaal,,,,Ivory_park,,,
int(10>5)
1
Show the number of exceedances per year
# 0 above 70% and no exceedances
# -1 not 70% data recovery
# 1 exceeds hourly
# 2 exceeds daily
# 3 exceeds hourly and daily
# 4 exceeds annual
# 5 exceeds hourly and annual
# 6 exceeds daily and annual
# 7 exceeds hourly, daily, and annual
vars=['PM10','PM2.5']
standard=[[134,48,19],[106,0,21],[0,75,40],[0,40,20]]
for s in Sites:
print(",{}".format(s),end="")
print("")
for v in range(len(vars)):
print(vars[v])
print("",end="")
for y in range(2017,2021):
print(y,end="")
for s in Sites:
df=extractStation(dfRaw,vars[v],s,Status=0)
df=df[['Value']]
try:
dfy=df['{}-01-01'.format(y):'{}-12-31'.format(y)]
# is there an hourly standard
if standard[v][0] > 0:
HDA=len(dfy['Value'].dropna())/(365*24)*100
HComply=dfy['Value'].dropna().quantile(0.99)>standard[v][0]
else:
HDA=len(dfy['Value'].dropna())/(365*24)*100
HComply=False
# is there an daily standard
if standard[v][1] > 0:
DDA=len(dfy['Value'].dropna().resample('1D').mean())/365*100
DComply=dfy['Value'].dropna().resample('1D').mean().quantile(0.99)>standard[v][1]
else:
DDA=len(dfy['Value'].dropna().resample('1D').mean())/365*100
DComply=False
# is there an annual standard
if standard[v][2] > 0:
AComply=dfy['Value'].mean()>standard[v][2]
else:
AComply=False
if HDA < DDA:
DA=HDA
else:
DA=DDA
index=0
if HComply and not DComply and not AComply:
index=1
if DComply and not HComply and not AComply:
index=2
if HComply and DComply and not AComply:
index=3
if AComply and not HComply and not DComply:
index=4
if HComply and AComply and not DComply:
index=5
if DComply and AComply and not HComply:
index=6
if HComply and DComply and AComply:
index=7
if DA< 70 and index==0:
index=-10
if DA<70 and index>0:
index=index*-1
print(",{:.0f}".format(index), end="")
#break
#print(y,s)
except:
print(",-10", end="")
print("")
,Diepkloof,Jabavu,Olifantsfontein,Springs,Springs-new,Kliprivier,Sebokeng,Sharpeville,Three Rivers,Etwatwa,Olifantsfontein,Tsakane,Wattville,Booysens,Ekandustria,Rosslyn,Bodibeng,Mogale_City,Olievenhoutbosch,PTA_West,Hammanskraal,Thokoza,Tswane_Market,Randwater,Bedfordview,Buccleugh,Tembisa,Randfontein,Leondale_City,Meyerton,Vanderbijlpark,Diepsloot,Orange_Farm,Alexandra,NWU_Vaal,Ivory_park PM10 2017,-6,-7,-7,-7,-10,-7,-7,-7,-7,-7,-7,-10,-7,-7,-10,-7,-7,-6,-7,-10,-7,-10,-10,-6,-10,-10,-7,-6,-10,-7,-7,-7,-7,-6,-10,-10 2018,6,7,-7,7,-10,7,-7,7,7,-7,-7,-10,-7,-7,-10,-10,-7,-10,-7,-10,7,-10,-10,-10,-10,-10,-10,-10,-10,-6,7,7,-10,-10,-10,-10 2019,7,7,7,-7,-10,-7,-7,-7,-7,7,7,-10,-10,-7,-10,-7,-10,-10,-7,-10,-7,-10,-10,-10,7,-6,-10,-10,-10,-10,7,-7,-7,-7,-7,-7 2020,7,7,7,7,-10,-6,-7,-7,7,7,7,-10,-10,-10,-10,7,-10,-10,-10,-7,-7,-10,7,-10,7,-7,-7,-10,-10,-7,7,7,-7,-7,7,-7 PM2.5 2017,5,-10,-10,-10,-10,-5,-5,-5,-4,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-5,-10,-10,-10,-10,-10,-4,-10,-10,-10,-5,-10,-10 2018,4,-4,-5,-10,-10,5,5,5,4,-10,-5,-10,-10,-10,-10,-10,-5,-10,5,-10,5,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-10,-5,-10,-10 2019,4,5,5,-5,-10,-5,-5,-5,-5,5,5,-10,-10,-10,-10,-4,-5,-10,-5,-10,-5,-10,-10,-10,4,-10,-10,-10,-10,-10,-5,-10,-10,-5,-10,-10 2020,-4,5,5,5,-10,-4,-4,-5,4,5,5,-10,-10,-10,-4,5,5,-10,-10,-10,5,-10,-5,-10,4,-4,-10,-10,-10,-5,5,-10,-10,-5,5,-10
dfy
| Value | |
|---|---|
| Date |
Threshold=65
Data=[]
for s in GJAsites:
for v in ['PM10','PM2.5']:
df=extractStation(dfRaw,v,s,Status=0)
# Daily averages
tThres=df['Value'].dropna().quantile(0.99)
tValue=df['Value'].dropna().resample('1D').mean()
tCount=df['Value'].dropna().resample('1D').count()
tDaily=pd.concat([tValue,tCount],axis=1)
tDaily.columns=['Mean','N']
tDaily.loc[tDaily['N']/24*100<Threshold,'Mean']=np.nan
# Monthly averages
tValue=tDaily['Mean'].dropna().resample('1M').mean()
tCount=tDaily['Mean'].dropna().resample('1M').count()
tMonthly=pd.concat([tValue,tCount],axis=1)
tMonthly.columns=['Mean','N']
tMonthly.loc[tMonthly['N']/tMonthly.index.days_in_month*100<Threshold,'Mean']=np.nan
# Annual average
tValue=tMonthly['Mean'].dropna().resample('1Y').mean()
tCount=tMonthly['Mean'].dropna().resample('1Y').count()
tAnnual=pd.concat([tValue,tCount],axis=1)
tAnnual.columns=['Mean','N']
tAnnual.loc[tAnnual['N']/12*100<Threshold,'Mean']=np.nan
#
if len(tAnnual['Mean'].dropna()) > 0:
Data.append([s,v,tAnnual['Mean'].mean(),tAnnual['Mean'].count()])
else:
tMonthly['Month']=tMonthly.index.month
tMonthly=tMonthly.groupby('Month').mean()[['Mean']]
if tMonthly['Mean'].count()/12*100>Threshold:
Data.append([s,v,tMonthly['Mean'].mean(),tMonthly['Mean'].count()])
dfAnn=pd.pivot_table(pd.DataFrame(data=Data, columns=['Site','Variable','Annual','N']),index='Site',columns='Variable',values=['Annual','N'])
dfAnn
| Annual | N | |||
|---|---|---|---|---|
| Variable | PM10 | PM2.5 | PM10 | PM2.5 |
| Site | ||||
| Alexandra | 69 | 36 | 1 | 1 |
| Bedfordview | 49 | 28 | 3 | 3 |
| Bodibeng | 69 | 30 | 10 | 1 |
| Booysens | 72 | NaN | 10 | NaN |
| Buccleugh | 42 | NaN | 8 | NaN |
| Diepkloof | 38 | 24 | 5 | 3 |
| Diepsloot | 43 | NaN | 2 | NaN |
| Etwatwa | 99 | 52 | 2 | 2 |
| Hammanskraal | 63 | 27 | 1 | 1 |
| Ivory_park | 49 | NaN | 11 | NaN |
| Jabavu | 67 | 41 | 4 | 3 |
| Kliprivier | 57 | 38 | 3 | 2 |
| NWU_Vaal | 44 | 26 | 2 | 2 |
| Olievenhoutbosch | 97 | 39 | 10 | 1 |
| Olifantsfontein | 71 | 34 | 2 | 2 |
| Orange_Farm | 43 | NaN | 11 | NaN |
| PTA_West | 30 | NaN | 1 | NaN |
| Randwater | NaN | 21 | NaN | 11 |
| Rosslyn | 33 | 26 | 2 | 1 |
| Sebokeng | 39 | 31 | 1 | 1 |
| Sharpeville | 58 | 37 | 1 | 1 |
| Springs | 40 | 32 | 2 | 1 |
| Three Rivers | 61 | 28 | 3 | 3 |
| Tswane_Market | 47 | 36 | 1 | 1 |
| Vanderbijlpark | 69 | 31 | 4 | 2 |
Campaigns=[['2021-04-21','2021-05-29'],['2021-06-27','2021-08-20']]
dftmp=extractStationVars(dfRaw,Site='Jabavu')
dftmp.tail()
| Variable | O3 | PM10 | PM2.5 | RH | SO2 | SolarRadiation | Temperature | WindDir | WindSpeed |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2021-10-02 08:00:00 | NaN | NaN | NaN | 98 | NaN | 27 | 16 | 271 | 1 |
| 2021-10-02 09:00:00 | NaN | NaN | NaN | 98 | NaN | 87 | 17 | 274 | 2 |
| 2021-10-02 10:00:00 | NaN | NaN | NaN | 98 | NaN | 60 | 17 | 288 | 2 |
| 2021-10-02 11:00:00 | NaN | NaN | NaN | 98 | NaN | 93 | 17 | 301 | 2 |
| 2021-10-02 12:00:00 | NaN | NaN | NaN | 97 | NaN | 456 | 20 | 292 | 2 |
dftmp[Campaigns[0][0]:Campaigns[0][1]]
| Variable | O3 | PM10 | PM2.5 | RH | SO2 | SolarRadiation | Temperature | WindDir | WindSpeed |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2021-04-21 00:00:00 | 8 | 175 | 106 | 62 | NaN | 0 | 16 | 44 | 1 |
| 2021-04-21 01:00:00 | 11 | 122 | 71 | 64 | NaN | 0 | 16 | 29 | NaN |
| 2021-04-21 02:00:00 | 17 | 38 | 20 | 70 | NaN | 0 | 15 | 59 | 1 |
| 2021-04-21 03:00:00 | 19 | 37 | 19 | 76 | NaN | 0 | 15 | 65 | NaN |
| 2021-04-21 04:00:00 | 15 | 36 | 19 | 90 | NaN | 0 | 14 | 61 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2021-05-29 19:00:00 | 6 | 220 | 117 | 26 | 12 | 0 | 17 | NaN | NaN |
| 2021-05-29 20:00:00 | 3 | 134 | 78 | 30 | 17 | 0 | 15 | NaN | NaN |
| 2021-05-29 21:00:00 | 3 | 169 | 106 | 31 | 8 | 0 | 15 | NaN | NaN |
| 2021-05-29 22:00:00 | 3 | 203 | 130 | 34 | 10 | 0 | 13 | NaN | NaN |
| 2021-05-29 23:00:00 | 2 | 197 | 126 | 35 | 9 | 0 | 13 | NaN | NaN |
936 rows × 9 columns
plotTimeSeries(dftmp[Campaigns[0][0]:Campaigns[0][1]],title="Jabavu")
4
plotTimeSeriesPeriod(dftmp,start=Campaigns[0][0], end=Campaigns[0][1], title="Jabavu Autumn Campaign")
plotTimeSeriesPeriod(dftmp,start=Campaigns[1][0], end=Campaigns[1][1], title="Jabavu Winter Campaign")
plotTimeSeries(dftmp[Campaigns[0][0]:Campaigns[0][1]],title="Jabavu Autumn Campaign")
4
plotTimeSeries(dftmp[Campaigns[1][0]:Campaigns[1][1]],title="Jabavu Winter Campaign")
4
plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM10', title="Jabavu Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM2.5', title="Jabavu Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM10', title="Jabavu Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM2.5', title="Jabavu Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
Campaigns=[['2021-04-21','2021-05-29'],['2021-07-31','2021-08-28']]
dftmp=extractStationVars(dfRaw,Site='Diepkloof')
dftmp.head()
| Variable | CO | NO | NO2 | NOx | O3 | PM10 | PM2.5 | RH | SO2 | SolarRadiation | Temperature | WindDir | WindSpeed |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||
| 2016-01-01 01:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 30 | NaN | 0 | 23 | 210 | 3 |
| 2016-01-01 02:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 34 | NaN | 0 | 22 | 14 | 3 |
| 2016-01-01 03:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 38 | NaN | 0 | 21 | 243 | 2 |
| 2016-01-01 04:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 40 | NaN | 0 | 21 | 344 | 3 |
| 2016-01-01 05:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 41 | NaN | 0 | 21 | 332 | 3 |
plotTimeSeriesPeriod(dftmp,start=Campaigns[0][0], end=Campaigns[0][1], title="Diepkloof Autumn Campaign")
plotTimeSeriesPeriod(dftmp,start=Campaigns[1][0], end=Campaigns[1][1], title="Diepkloof Winter Campaign")
plotTimeSeries(dftmp[Campaigns[0][0]:Campaigns[0][1]],title="Diepkloof Autumn Campaign")
4
plotTimeSeries(dftmp[Campaigns[1][0]:Campaigns[1][1]],title="Diepkloof Winter Campaign")
4
plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM10', title="Diepkloof Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM2.5', title="Diepkloof Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM10', title="Diepkloof Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM2.5', title="Diepkloof Autumn Campaign")
/tmp/ipykernel_109379/3348448403.py:17: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first. img = ax.pcolormesh(WD1, WS1, Z, norm=norm, edgecolor='none', linewidth=0, alpha=1, shading='nearest', snap='bool', capstyle='round') /tmp/ipykernel_109379/3348448403.py:44: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(['N', 'N-E', 'E', 'S-E', 'S', 'S-W', 'W', 'N-W'], fontsize=10)
Campaigns=[['2021-01-17','2021-03-03'],['2021-06-16','2021-07-28']]
dftmp=extractStationVars(dfRaw,Site='Springs')
dftmp.head()
| Variable | CO | NO | NO2 | NOx | O3 | PM10 | PM2.5 | RH | SO2 | SolarRadiation | Temperature |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||
| 2017-08-02 13:00:00 | 5 | NaN | 79 | NaN | NaN | NaN | NaN | NaN | 85 | NaN | NaN |
| 2017-08-02 14:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9 | NaN | NaN |
| 2017-08-02 15:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 13 | NaN | NaN |
| 2017-08-02 16:00:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5 | NaN | NaN |
| 2017-08-02 17:00:00 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 5 | NaN | NaN |
plotTimeSeriesPeriod(dftmp,start=Campaigns[0][0], end=Campaigns[0][1], title="Springs Summer Campaign")
plotTimeSeriesPeriod(dftmp,start=Campaigns[1][0], end=Campaigns[1][1], title="Springs Winter Campaign")
#plotTimeSeries(dftmp[Campaigns[0][0]:Campaigns[0][1]],title="Springs Summer Campaign")
#plotTimeSeries(dftmp[Campaigns[1][0]:Campaigns[1][1]],title="Springs Winter Campaign")
#plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM10', title="Springs Autumn Campaign")
#plot_ppollrose(dftmp[Campaigns[0][0]:Campaigns[0][1]],CO='PM2.5', title="Springs Autumn Campaign")
#plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM10', title="Springs Autumn Campaign")
#plot_ppollrose(dftmp[Campaigns[1][0]:Campaigns[1][1]],CO='PM2.5', title="Springs Autumn Campaign")
Campaigns=[['2021-04-21','2021-06-04'],['2021-06-16','2021-07-28']]
dftmp=extractStationVars(dfRaw,Site='Olifantsfontein')
dftmp.head()
| Variable | CO | NO | NO2 | NOx | PM10 | PM2.5 | RH | SO2 | SolarRadiation | Temperature | WindDir | WindSpeed |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||
| 2017-08-01 01:00:00 | NaN | NaN | NaN | NaN | 39 | NaN | 80 | NaN | 0 | 9 | 18 | 1 |
| 2017-08-01 02:00:00 | NaN | NaN | NaN | NaN | 38 | NaN | 86 | NaN | 0 | 8 | 53 | 1 |
| 2017-08-01 03:00:00 | NaN | NaN | NaN | NaN | 32 | NaN | 91 | NaN | 0 | 8 | 75 | 1 |
| 2017-08-01 04:00:00 | NaN | NaN | NaN | NaN | 32 | NaN | 92 | NaN | 0 | 9 | 63 | 1 |
| 2017-08-01 05:00:00 | NaN | NaN | NaN | NaN | 25 | NaN | 96 | NaN | 0 | 7 | 115 | 1 |
plotTimeSeriesPeriod(dftmp,start=Campaigns[0][0], end=Campaigns[0][1], title="Olifantsfontein Autumn Campaign")
plotTimeSeriesPeriod(dftmp,start=Campaigns[1][0], end=Campaigns[1][1], title="Olifantsfontein Winter Campaign")
#plotTimeSeries(dftmp[Campaigns[0][0]:Campaigns[0][1]],title="Olifantsfontein Autumn Campaign")
#plotTimeSeries(dftmp[Campaigns[1][0]:Campaigns[1][1]],title="Olifantsfontein Autumn Campaign")